xref: /dragonfly/sys/dev/drm/radeon/si.c (revision f503b4c4)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  *
24  * $FreeBSD: head/sys/dev/drm2/radeon/si.c 254885 2013-08-25 19:37:15Z dumbbell $
25  */
26 
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <uapi_drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 
35 #define SI_PFP_UCODE_SIZE 2144
36 #define SI_PM4_UCODE_SIZE 2144
37 #define SI_CE_UCODE_SIZE 2144
38 #define SI_RLC_UCODE_SIZE 2048
39 #define SI_MC_UCODE_SIZE 7769
40 
41 /* get temperature in millidegrees */
42 int si_get_temp(struct radeon_device *rdev)
43 {
44 	u32 temp;
45 	int actual_temp = 0;
46 
47 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
48 		CTF_TEMP_SHIFT;
49 
50 	if (temp & 0x200)
51 		actual_temp = 255;
52 	else
53 		actual_temp = temp & 0x1ff;
54 
55 	actual_temp = (actual_temp * 1000);
56 
57 	return actual_temp;
58 }
59 
60 #define TAHITI_IO_MC_REGS_SIZE 36
61 
62 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
63 	{0x0000006f, 0x03044000},
64 	{0x00000070, 0x0480c018},
65 	{0x00000071, 0x00000040},
66 	{0x00000072, 0x01000000},
67 	{0x00000074, 0x000000ff},
68 	{0x00000075, 0x00143400},
69 	{0x00000076, 0x08ec0800},
70 	{0x00000077, 0x040000cc},
71 	{0x00000079, 0x00000000},
72 	{0x0000007a, 0x21000409},
73 	{0x0000007c, 0x00000000},
74 	{0x0000007d, 0xe8000000},
75 	{0x0000007e, 0x044408a8},
76 	{0x0000007f, 0x00000003},
77 	{0x00000080, 0x00000000},
78 	{0x00000081, 0x01000000},
79 	{0x00000082, 0x02000000},
80 	{0x00000083, 0x00000000},
81 	{0x00000084, 0xe3f3e4f4},
82 	{0x00000085, 0x00052024},
83 	{0x00000087, 0x00000000},
84 	{0x00000088, 0x66036603},
85 	{0x00000089, 0x01000000},
86 	{0x0000008b, 0x1c0a0000},
87 	{0x0000008c, 0xff010000},
88 	{0x0000008e, 0xffffefff},
89 	{0x0000008f, 0xfff3efff},
90 	{0x00000090, 0xfff3efbf},
91 	{0x00000094, 0x00101101},
92 	{0x00000095, 0x00000fff},
93 	{0x00000096, 0x00116fff},
94 	{0x00000097, 0x60010000},
95 	{0x00000098, 0x10010000},
96 	{0x00000099, 0x00006000},
97 	{0x0000009a, 0x00001000},
98 	{0x0000009f, 0x00a77400}
99 };
100 
101 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
102 	{0x0000006f, 0x03044000},
103 	{0x00000070, 0x0480c018},
104 	{0x00000071, 0x00000040},
105 	{0x00000072, 0x01000000},
106 	{0x00000074, 0x000000ff},
107 	{0x00000075, 0x00143400},
108 	{0x00000076, 0x08ec0800},
109 	{0x00000077, 0x040000cc},
110 	{0x00000079, 0x00000000},
111 	{0x0000007a, 0x21000409},
112 	{0x0000007c, 0x00000000},
113 	{0x0000007d, 0xe8000000},
114 	{0x0000007e, 0x044408a8},
115 	{0x0000007f, 0x00000003},
116 	{0x00000080, 0x00000000},
117 	{0x00000081, 0x01000000},
118 	{0x00000082, 0x02000000},
119 	{0x00000083, 0x00000000},
120 	{0x00000084, 0xe3f3e4f4},
121 	{0x00000085, 0x00052024},
122 	{0x00000087, 0x00000000},
123 	{0x00000088, 0x66036603},
124 	{0x00000089, 0x01000000},
125 	{0x0000008b, 0x1c0a0000},
126 	{0x0000008c, 0xff010000},
127 	{0x0000008e, 0xffffefff},
128 	{0x0000008f, 0xfff3efff},
129 	{0x00000090, 0xfff3efbf},
130 	{0x00000094, 0x00101101},
131 	{0x00000095, 0x00000fff},
132 	{0x00000096, 0x00116fff},
133 	{0x00000097, 0x60010000},
134 	{0x00000098, 0x10010000},
135 	{0x00000099, 0x00006000},
136 	{0x0000009a, 0x00001000},
137 	{0x0000009f, 0x00a47400}
138 };
139 
140 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
141 	{0x0000006f, 0x03044000},
142 	{0x00000070, 0x0480c018},
143 	{0x00000071, 0x00000040},
144 	{0x00000072, 0x01000000},
145 	{0x00000074, 0x000000ff},
146 	{0x00000075, 0x00143400},
147 	{0x00000076, 0x08ec0800},
148 	{0x00000077, 0x040000cc},
149 	{0x00000079, 0x00000000},
150 	{0x0000007a, 0x21000409},
151 	{0x0000007c, 0x00000000},
152 	{0x0000007d, 0xe8000000},
153 	{0x0000007e, 0x044408a8},
154 	{0x0000007f, 0x00000003},
155 	{0x00000080, 0x00000000},
156 	{0x00000081, 0x01000000},
157 	{0x00000082, 0x02000000},
158 	{0x00000083, 0x00000000},
159 	{0x00000084, 0xe3f3e4f4},
160 	{0x00000085, 0x00052024},
161 	{0x00000087, 0x00000000},
162 	{0x00000088, 0x66036603},
163 	{0x00000089, 0x01000000},
164 	{0x0000008b, 0x1c0a0000},
165 	{0x0000008c, 0xff010000},
166 	{0x0000008e, 0xffffefff},
167 	{0x0000008f, 0xfff3efff},
168 	{0x00000090, 0xfff3efbf},
169 	{0x00000094, 0x00101101},
170 	{0x00000095, 0x00000fff},
171 	{0x00000096, 0x00116fff},
172 	{0x00000097, 0x60010000},
173 	{0x00000098, 0x10010000},
174 	{0x00000099, 0x00006000},
175 	{0x0000009a, 0x00001000},
176 	{0x0000009f, 0x00a37400}
177 };
178 
179 /* ucode loading */
180 static int si_mc_load_microcode(struct radeon_device *rdev)
181 {
182 	const __be32 *fw_data;
183 	u32 running, blackout = 0;
184 	u32 *io_mc_regs;
185 	int i, ucode_size, regs_size;
186 
187 	if (!rdev->mc_fw)
188 		return -EINVAL;
189 
190 	switch (rdev->family) {
191 	case CHIP_TAHITI:
192 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
193 		ucode_size = SI_MC_UCODE_SIZE;
194 		regs_size = TAHITI_IO_MC_REGS_SIZE;
195 		break;
196 	case CHIP_PITCAIRN:
197 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
198 		ucode_size = SI_MC_UCODE_SIZE;
199 		regs_size = TAHITI_IO_MC_REGS_SIZE;
200 		break;
201 	case CHIP_VERDE:
202 	default:
203 		io_mc_regs = (u32 *)&verde_io_mc_regs;
204 		ucode_size = SI_MC_UCODE_SIZE;
205 		regs_size = TAHITI_IO_MC_REGS_SIZE;
206 		break;
207 	}
208 
209 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
210 
211 	if (running == 0) {
212 		if (running) {
213 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
214 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
215 		}
216 
217 		/* reset the engine and set to writable */
218 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
219 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
220 
221 		/* load mc io regs */
222 		for (i = 0; i < regs_size; i++) {
223 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
224 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
225 		}
226 		/* load the MC ucode */
227 		fw_data = (const __be32 *)rdev->mc_fw->data;
228 		for (i = 0; i < ucode_size; i++)
229 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
230 
231 		/* put the engine back into the active state */
232 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
233 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
234 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
235 
236 		/* wait for training to complete */
237 		for (i = 0; i < rdev->usec_timeout; i++) {
238 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
239 				break;
240 			DRM_UDELAY(1);
241 		}
242 		for (i = 0; i < rdev->usec_timeout; i++) {
243 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
244 				break;
245 			DRM_UDELAY(1);
246 		}
247 
248 		if (running)
249 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
250 	}
251 
252 	return 0;
253 }
254 
255 static int si_init_microcode(struct radeon_device *rdev)
256 {
257 	const char *chip_name;
258 	const char *rlc_chip_name;
259 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
260 	char fw_name[30];
261 	int err;
262 
263 	DRM_DEBUG("\n");
264 
265 	switch (rdev->family) {
266 	case CHIP_TAHITI:
267 		chip_name = "TAHITI";
268 		rlc_chip_name = "TAHITI";
269 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
270 		me_req_size = SI_PM4_UCODE_SIZE * 4;
271 		ce_req_size = SI_CE_UCODE_SIZE * 4;
272 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
273 		mc_req_size = SI_MC_UCODE_SIZE * 4;
274 		break;
275 	case CHIP_PITCAIRN:
276 		chip_name = "PITCAIRN";
277 		rlc_chip_name = "PITCAIRN";
278 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
279 		me_req_size = SI_PM4_UCODE_SIZE * 4;
280 		ce_req_size = SI_CE_UCODE_SIZE * 4;
281 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
282 		mc_req_size = SI_MC_UCODE_SIZE * 4;
283 		break;
284 	case CHIP_VERDE:
285 		chip_name = "VERDE";
286 		rlc_chip_name = "VERDE";
287 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
288 		me_req_size = SI_PM4_UCODE_SIZE * 4;
289 		ce_req_size = SI_CE_UCODE_SIZE * 4;
290 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
291 		mc_req_size = SI_MC_UCODE_SIZE * 4;
292 		break;
293 	default: panic("%s: Unsupported family %d", __func__, rdev->family);
294 	}
295 
296 	DRM_INFO("Loading %s Microcode\n", chip_name);
297 	err = 0;
298 
299 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
300 	rdev->pfp_fw = firmware_get(fw_name);
301 	if (rdev->pfp_fw == NULL) {
302 		err = -ENOENT;
303 		goto out;
304 	}
305 	if (rdev->pfp_fw->datasize != pfp_req_size) {
306 		DRM_ERROR(
307 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
308 		       rdev->pfp_fw->datasize, fw_name);
309 		err = -EINVAL;
310 		goto out;
311 	}
312 
313 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
314 	rdev->me_fw = firmware_get(fw_name);
315 	if (rdev->me_fw == NULL) {
316 		err = -ENOENT;
317 		goto out;
318 	}
319 	if (rdev->me_fw->datasize != me_req_size) {
320 		DRM_ERROR(
321 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
322 		       rdev->me_fw->datasize, fw_name);
323 		err = -EINVAL;
324 	}
325 
326 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
327 	rdev->ce_fw = firmware_get(fw_name);
328 	if (rdev->ce_fw == NULL) {
329 		err = -ENOENT;
330 		goto out;
331 	}
332 	if (rdev->ce_fw->datasize != ce_req_size) {
333 		DRM_ERROR(
334 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
335 		       rdev->ce_fw->datasize, fw_name);
336 		err = -EINVAL;
337 	}
338 
339 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc",
340 		  rlc_chip_name);
341 	rdev->rlc_fw = firmware_get(fw_name);
342 	if (rdev->rlc_fw == NULL) {
343 		err = -ENOENT;
344 		goto out;
345 	}
346 	if (rdev->rlc_fw->datasize != rlc_req_size) {
347 		DRM_ERROR(
348 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
349 		       rdev->rlc_fw->datasize, fw_name);
350 		err = -EINVAL;
351 	}
352 
353 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
354 	rdev->mc_fw = firmware_get(fw_name);
355 	if (rdev->mc_fw == NULL) {
356 		err = -ENOENT;
357 		goto out;
358 	}
359 	if (rdev->mc_fw->datasize != mc_req_size) {
360 		DRM_ERROR(
361 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
362 		       rdev->mc_fw->datasize, fw_name);
363 		err = -EINVAL;
364 	}
365 
366 out:
367 	if (err) {
368 		if (err != -EINVAL)
369 			DRM_ERROR(
370 			       "si_cp: Failed to load firmware \"%s\"\n",
371 			       fw_name);
372 		if (rdev->pfp_fw != NULL) {
373 			firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
374 			rdev->pfp_fw = NULL;
375 		}
376 		if (rdev->me_fw != NULL) {
377 			firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
378 			rdev->me_fw = NULL;
379 		}
380 		if (rdev->ce_fw != NULL) {
381 			firmware_put(rdev->ce_fw, FIRMWARE_UNLOAD);
382 			rdev->ce_fw = NULL;
383 		}
384 		if (rdev->rlc_fw != NULL) {
385 			firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
386 			rdev->rlc_fw = NULL;
387 		}
388 		if (rdev->mc_fw != NULL) {
389 			firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
390 			rdev->mc_fw = NULL;
391 		}
392 	}
393 	return err;
394 }
395 
396 /**
397  * si_fini_microcode - drop the firmwares image references
398  *
399  * @rdev: radeon_device pointer
400  *
401  * Drop the pfp, me, rlc, mc and ce firmware image references.
402  * Called at driver shutdown.
403  */
404 static void si_fini_microcode(struct radeon_device *rdev)
405 {
406 
407 	if (rdev->pfp_fw != NULL) {
408 		firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
409 		rdev->pfp_fw = NULL;
410 	}
411 
412 	if (rdev->me_fw != NULL) {
413 		firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
414 		rdev->me_fw = NULL;
415 	}
416 
417 	if (rdev->rlc_fw != NULL) {
418 		firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
419 		rdev->rlc_fw = NULL;
420 	}
421 
422 	if (rdev->mc_fw != NULL) {
423 		firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
424 		rdev->mc_fw = NULL;
425 	}
426 
427 	if (rdev->ce_fw != NULL) {
428 		firmware_put(rdev->ce_fw, FIRMWARE_UNLOAD);
429 		rdev->ce_fw = NULL;
430 	}
431 }
432 
433 /* watermark setup */
434 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
435 				   struct radeon_crtc *radeon_crtc,
436 				   struct drm_display_mode *mode,
437 				   struct drm_display_mode *other_mode)
438 {
439 	u32 tmp;
440 	/*
441 	 * Line Buffer Setup
442 	 * There are 3 line buffers, each one shared by 2 display controllers.
443 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
444 	 * the display controllers.  The paritioning is done via one of four
445 	 * preset allocations specified in bits 21:20:
446 	 *  0 - half lb
447 	 *  2 - whole lb, other crtc must be disabled
448 	 */
449 	/* this can get tricky if we have two large displays on a paired group
450 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
451 	 * non-linked crtcs for maximum line buffer allocation.
452 	 */
453 	if (radeon_crtc->base.enabled && mode) {
454 		if (other_mode)
455 			tmp = 0; /* 1/2 */
456 		else
457 			tmp = 2; /* whole */
458 	} else
459 		tmp = 0;
460 
461 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
462 	       DC_LB_MEMORY_CONFIG(tmp));
463 
464 	if (radeon_crtc->base.enabled && mode) {
465 		switch (tmp) {
466 		case 0:
467 		default:
468 			return 4096 * 2;
469 		case 2:
470 			return 8192 * 2;
471 		}
472 	}
473 
474 	/* controller not enabled, so no lb used */
475 	return 0;
476 }
477 
478 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
479 {
480 	u32 tmp = RREG32(MC_SHARED_CHMAP);
481 
482 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
483 	case 0:
484 	default:
485 		return 1;
486 	case 1:
487 		return 2;
488 	case 2:
489 		return 4;
490 	case 3:
491 		return 8;
492 	case 4:
493 		return 3;
494 	case 5:
495 		return 6;
496 	case 6:
497 		return 10;
498 	case 7:
499 		return 12;
500 	case 8:
501 		return 16;
502 	}
503 }
504 
505 struct dce6_wm_params {
506 	u32 dram_channels; /* number of dram channels */
507 	u32 yclk;          /* bandwidth per dram data pin in kHz */
508 	u32 sclk;          /* engine clock in kHz */
509 	u32 disp_clk;      /* display clock in kHz */
510 	u32 src_width;     /* viewport width */
511 	u32 active_time;   /* active display time in ns */
512 	u32 blank_time;    /* blank time in ns */
513 	bool interlaced;    /* mode is interlaced */
514 	fixed20_12 vsc;    /* vertical scale ratio */
515 	u32 num_heads;     /* number of active crtcs */
516 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
517 	u32 lb_size;       /* line buffer allocated to pipe */
518 	u32 vtaps;         /* vertical scaler taps */
519 };
520 
521 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
522 {
523 	/* Calculate raw DRAM Bandwidth */
524 	fixed20_12 dram_efficiency; /* 0.7 */
525 	fixed20_12 yclk, dram_channels, bandwidth;
526 	fixed20_12 a;
527 
528 	a.full = dfixed_const(1000);
529 	yclk.full = dfixed_const(wm->yclk);
530 	yclk.full = dfixed_div(yclk, a);
531 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
532 	a.full = dfixed_const(10);
533 	dram_efficiency.full = dfixed_const(7);
534 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
535 	bandwidth.full = dfixed_mul(dram_channels, yclk);
536 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
537 
538 	return dfixed_trunc(bandwidth);
539 }
540 
541 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
542 {
543 	/* Calculate DRAM Bandwidth and the part allocated to display. */
544 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
545 	fixed20_12 yclk, dram_channels, bandwidth;
546 	fixed20_12 a;
547 
548 	a.full = dfixed_const(1000);
549 	yclk.full = dfixed_const(wm->yclk);
550 	yclk.full = dfixed_div(yclk, a);
551 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
552 	a.full = dfixed_const(10);
553 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
554 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
555 	bandwidth.full = dfixed_mul(dram_channels, yclk);
556 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
557 
558 	return dfixed_trunc(bandwidth);
559 }
560 
561 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
562 {
563 	/* Calculate the display Data return Bandwidth */
564 	fixed20_12 return_efficiency; /* 0.8 */
565 	fixed20_12 sclk, bandwidth;
566 	fixed20_12 a;
567 
568 	a.full = dfixed_const(1000);
569 	sclk.full = dfixed_const(wm->sclk);
570 	sclk.full = dfixed_div(sclk, a);
571 	a.full = dfixed_const(10);
572 	return_efficiency.full = dfixed_const(8);
573 	return_efficiency.full = dfixed_div(return_efficiency, a);
574 	a.full = dfixed_const(32);
575 	bandwidth.full = dfixed_mul(a, sclk);
576 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
577 
578 	return dfixed_trunc(bandwidth);
579 }
580 
581 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
582 {
583 	return 32;
584 }
585 
586 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
587 {
588 	/* Calculate the DMIF Request Bandwidth */
589 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
590 	fixed20_12 disp_clk, sclk, bandwidth;
591 	fixed20_12 a, b1, b2;
592 	u32 min_bandwidth;
593 
594 	a.full = dfixed_const(1000);
595 	disp_clk.full = dfixed_const(wm->disp_clk);
596 	disp_clk.full = dfixed_div(disp_clk, a);
597 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
598 	b1.full = dfixed_mul(a, disp_clk);
599 
600 	a.full = dfixed_const(1000);
601 	sclk.full = dfixed_const(wm->sclk);
602 	sclk.full = dfixed_div(sclk, a);
603 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
604 	b2.full = dfixed_mul(a, sclk);
605 
606 	a.full = dfixed_const(10);
607 	disp_clk_request_efficiency.full = dfixed_const(8);
608 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
609 
610 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
611 
612 	a.full = dfixed_const(min_bandwidth);
613 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
614 
615 	return dfixed_trunc(bandwidth);
616 }
617 
618 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
619 {
620 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
621 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
622 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
623 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
624 
625 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
626 }
627 
628 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
629 {
630 	/* Calculate the display mode Average Bandwidth
631 	 * DisplayMode should contain the source and destination dimensions,
632 	 * timing, etc.
633 	 */
634 	fixed20_12 bpp;
635 	fixed20_12 line_time;
636 	fixed20_12 src_width;
637 	fixed20_12 bandwidth;
638 	fixed20_12 a;
639 
640 	a.full = dfixed_const(1000);
641 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
642 	line_time.full = dfixed_div(line_time, a);
643 	bpp.full = dfixed_const(wm->bytes_per_pixel);
644 	src_width.full = dfixed_const(wm->src_width);
645 	bandwidth.full = dfixed_mul(src_width, bpp);
646 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
647 	bandwidth.full = dfixed_div(bandwidth, line_time);
648 
649 	return dfixed_trunc(bandwidth);
650 }
651 
652 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
653 {
654 	/* First calcualte the latency in ns */
655 	u32 mc_latency = 2000; /* 2000 ns. */
656 	u32 available_bandwidth = dce6_available_bandwidth(wm);
657 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
658 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
659 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
660 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
661 		(wm->num_heads * cursor_line_pair_return_time);
662 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
663 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
664 	u32 tmp, dmif_size = 12288;
665 	fixed20_12 a, b, c;
666 
667 	if (wm->num_heads == 0)
668 		return 0;
669 
670 	a.full = dfixed_const(2);
671 	b.full = dfixed_const(1);
672 	if ((wm->vsc.full > a.full) ||
673 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
674 	    (wm->vtaps >= 5) ||
675 	    ((wm->vsc.full >= a.full) && wm->interlaced))
676 		max_src_lines_per_dst_line = 4;
677 	else
678 		max_src_lines_per_dst_line = 2;
679 
680 	a.full = dfixed_const(available_bandwidth);
681 	b.full = dfixed_const(wm->num_heads);
682 	a.full = dfixed_div(a, b);
683 
684 	b.full = dfixed_const(mc_latency + 512);
685 	c.full = dfixed_const(wm->disp_clk);
686 	b.full = dfixed_div(b, c);
687 
688 	c.full = dfixed_const(dmif_size);
689 	b.full = dfixed_div(c, b);
690 
691 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
692 
693 	b.full = dfixed_const(1000);
694 	c.full = dfixed_const(wm->disp_clk);
695 	b.full = dfixed_div(c, b);
696 	c.full = dfixed_const(wm->bytes_per_pixel);
697 	b.full = dfixed_mul(b, c);
698 
699 	lb_fill_bw = min(tmp, dfixed_trunc(b));
700 
701 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
702 	b.full = dfixed_const(1000);
703 	c.full = dfixed_const(lb_fill_bw);
704 	b.full = dfixed_div(c, b);
705 	a.full = dfixed_div(a, b);
706 	line_fill_time = dfixed_trunc(a);
707 
708 	if (line_fill_time < wm->active_time)
709 		return latency;
710 	else
711 		return latency + (line_fill_time - wm->active_time);
712 
713 }
714 
715 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
716 {
717 	if (dce6_average_bandwidth(wm) <=
718 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
719 		return true;
720 	else
721 		return false;
722 };
723 
724 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
725 {
726 	if (dce6_average_bandwidth(wm) <=
727 	    (dce6_available_bandwidth(wm) / wm->num_heads))
728 		return true;
729 	else
730 		return false;
731 };
732 
733 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
734 {
735 	u32 lb_partitions = wm->lb_size / wm->src_width;
736 	u32 line_time = wm->active_time + wm->blank_time;
737 	u32 latency_tolerant_lines;
738 	u32 latency_hiding;
739 	fixed20_12 a;
740 
741 	a.full = dfixed_const(1);
742 	if (wm->vsc.full > a.full)
743 		latency_tolerant_lines = 1;
744 	else {
745 		if (lb_partitions <= (wm->vtaps + 1))
746 			latency_tolerant_lines = 1;
747 		else
748 			latency_tolerant_lines = 2;
749 	}
750 
751 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
752 
753 	if (dce6_latency_watermark(wm) <= latency_hiding)
754 		return true;
755 	else
756 		return false;
757 }
758 
759 static void dce6_program_watermarks(struct radeon_device *rdev,
760 					 struct radeon_crtc *radeon_crtc,
761 					 u32 lb_size, u32 num_heads)
762 {
763 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
764 	struct dce6_wm_params wm;
765 	u32 pixel_period;
766 	u32 line_time = 0;
767 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
768 	u32 priority_a_mark = 0, priority_b_mark = 0;
769 	u32 priority_a_cnt = PRIORITY_OFF;
770 	u32 priority_b_cnt = PRIORITY_OFF;
771 	u32 tmp, arb_control3;
772 	fixed20_12 a, b, c;
773 
774 	if (radeon_crtc->base.enabled && num_heads && mode) {
775 		pixel_period = 1000000 / (u32)mode->clock;
776 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
777 		priority_a_cnt = 0;
778 		priority_b_cnt = 0;
779 
780 		wm.yclk = rdev->pm.current_mclk * 10;
781 		wm.sclk = rdev->pm.current_sclk * 10;
782 		wm.disp_clk = mode->clock;
783 		wm.src_width = mode->crtc_hdisplay;
784 		wm.active_time = mode->crtc_hdisplay * pixel_period;
785 		wm.blank_time = line_time - wm.active_time;
786 		wm.interlaced = false;
787 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
788 			wm.interlaced = true;
789 		wm.vsc = radeon_crtc->vsc;
790 		wm.vtaps = 1;
791 		if (radeon_crtc->rmx_type != RMX_OFF)
792 			wm.vtaps = 2;
793 		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
794 		wm.lb_size = lb_size;
795 		if (rdev->family == CHIP_ARUBA)
796 			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
797 		else
798 			wm.dram_channels = si_get_number_of_dram_channels(rdev);
799 		wm.num_heads = num_heads;
800 
801 		/* set for high clocks */
802 		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
803 		/* set for low clocks */
804 		/* wm.yclk = low clk; wm.sclk = low clk */
805 		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
806 
807 		/* possibly force display priority to high */
808 		/* should really do this at mode validation time... */
809 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
810 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
811 		    !dce6_check_latency_hiding(&wm) ||
812 		    (rdev->disp_priority == 2)) {
813 			DRM_DEBUG_KMS("force priority to high\n");
814 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
815 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
816 		}
817 
818 		a.full = dfixed_const(1000);
819 		b.full = dfixed_const(mode->clock);
820 		b.full = dfixed_div(b, a);
821 		c.full = dfixed_const(latency_watermark_a);
822 		c.full = dfixed_mul(c, b);
823 		c.full = dfixed_mul(c, radeon_crtc->hsc);
824 		c.full = dfixed_div(c, a);
825 		a.full = dfixed_const(16);
826 		c.full = dfixed_div(c, a);
827 		priority_a_mark = dfixed_trunc(c);
828 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
829 
830 		a.full = dfixed_const(1000);
831 		b.full = dfixed_const(mode->clock);
832 		b.full = dfixed_div(b, a);
833 		c.full = dfixed_const(latency_watermark_b);
834 		c.full = dfixed_mul(c, b);
835 		c.full = dfixed_mul(c, radeon_crtc->hsc);
836 		c.full = dfixed_div(c, a);
837 		a.full = dfixed_const(16);
838 		c.full = dfixed_div(c, a);
839 		priority_b_mark = dfixed_trunc(c);
840 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
841 	}
842 
843 	/* select wm A */
844 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
845 	tmp = arb_control3;
846 	tmp &= ~LATENCY_WATERMARK_MASK(3);
847 	tmp |= LATENCY_WATERMARK_MASK(1);
848 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
849 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
850 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
851 		LATENCY_HIGH_WATERMARK(line_time)));
852 	/* select wm B */
853 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
854 	tmp &= ~LATENCY_WATERMARK_MASK(3);
855 	tmp |= LATENCY_WATERMARK_MASK(2);
856 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
857 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
858 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
859 		LATENCY_HIGH_WATERMARK(line_time)));
860 	/* restore original selection */
861 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
862 
863 	/* write the priority marks */
864 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
865 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
866 
867 }
868 
869 void dce6_bandwidth_update(struct radeon_device *rdev)
870 {
871 	struct drm_display_mode *mode0 = NULL;
872 	struct drm_display_mode *mode1 = NULL;
873 	u32 num_heads = 0, lb_size;
874 	int i;
875 
876 	radeon_update_display_priority(rdev);
877 
878 	for (i = 0; i < rdev->num_crtc; i++) {
879 		if (rdev->mode_info.crtcs[i]->base.enabled)
880 			num_heads++;
881 	}
882 	for (i = 0; i < rdev->num_crtc; i += 2) {
883 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
884 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
885 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
886 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
887 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
888 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
889 	}
890 }
891 
892 /*
893  * Core functions
894  */
895 static void si_tiling_mode_table_init(struct radeon_device *rdev)
896 {
897 	const u32 num_tile_mode_states = 32;
898 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
899 
900 	switch (rdev->config.si.mem_row_size_in_kb) {
901 	case 1:
902 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
903 		break;
904 	case 2:
905 	default:
906 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
907 		break;
908 	case 4:
909 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
910 		break;
911 	}
912 
913 	if ((rdev->family == CHIP_TAHITI) ||
914 	    (rdev->family == CHIP_PITCAIRN)) {
915 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
916 			switch (reg_offset) {
917 			case 0:  /* non-AA compressed depth or any compressed stencil */
918 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
919 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
920 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
921 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
922 						 NUM_BANKS(ADDR_SURF_16_BANK) |
923 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
924 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
925 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
926 				break;
927 			case 1:  /* 2xAA/4xAA compressed depth only */
928 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
929 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
930 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
931 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
932 						 NUM_BANKS(ADDR_SURF_16_BANK) |
933 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
934 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
935 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
936 				break;
937 			case 2:  /* 8xAA compressed depth only */
938 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
940 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
941 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
942 						 NUM_BANKS(ADDR_SURF_16_BANK) |
943 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
944 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
945 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
946 				break;
947 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
948 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
949 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
950 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
951 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
952 						 NUM_BANKS(ADDR_SURF_16_BANK) |
953 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
954 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
955 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
956 				break;
957 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
958 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
959 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
960 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
961 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
962 						 NUM_BANKS(ADDR_SURF_16_BANK) |
963 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
964 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
965 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
966 				break;
967 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
968 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
969 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
970 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
971 						 TILE_SPLIT(split_equal_to_row_size) |
972 						 NUM_BANKS(ADDR_SURF_16_BANK) |
973 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
974 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
975 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
976 				break;
977 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
978 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
979 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
980 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
981 						 TILE_SPLIT(split_equal_to_row_size) |
982 						 NUM_BANKS(ADDR_SURF_16_BANK) |
983 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
984 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
985 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
986 				break;
987 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
988 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
989 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
990 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
991 						 TILE_SPLIT(split_equal_to_row_size) |
992 						 NUM_BANKS(ADDR_SURF_16_BANK) |
993 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
994 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
995 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
996 				break;
997 			case 8:  /* 1D and 1D Array Surfaces */
998 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
999 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1001 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1002 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1003 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1004 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1005 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1006 				break;
1007 			case 9:  /* Displayable maps. */
1008 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1009 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1010 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1011 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1012 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1013 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1014 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1015 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1016 				break;
1017 			case 10:  /* Display 8bpp. */
1018 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1019 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1020 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1021 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1022 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1023 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1024 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1025 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1026 				break;
1027 			case 11:  /* Display 16bpp. */
1028 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1029 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1030 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1031 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1032 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1033 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1036 				break;
1037 			case 12:  /* Display 32bpp. */
1038 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1039 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1040 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1041 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1042 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1043 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1044 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1045 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1046 				break;
1047 			case 13:  /* Thin. */
1048 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1049 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1050 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1051 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1052 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1053 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1054 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1055 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1056 				break;
1057 			case 14:  /* Thin 8 bpp. */
1058 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1059 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1060 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1061 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1062 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1063 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1066 				break;
1067 			case 15:  /* Thin 16 bpp. */
1068 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1069 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1070 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1071 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1072 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1073 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1074 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1075 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1076 				break;
1077 			case 16:  /* Thin 32 bpp. */
1078 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1079 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1080 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1081 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1082 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1083 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1084 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1085 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1086 				break;
1087 			case 17:  /* Thin 64 bpp. */
1088 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1089 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1090 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1091 						 TILE_SPLIT(split_equal_to_row_size) |
1092 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1093 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1096 				break;
1097 			case 21:  /* 8 bpp PRT. */
1098 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1099 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1100 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1101 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1102 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1103 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1104 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1105 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1106 				break;
1107 			case 22:  /* 16 bpp PRT */
1108 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1109 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1110 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1111 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1112 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1113 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1114 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1115 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1116 				break;
1117 			case 23:  /* 32 bpp PRT */
1118 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1119 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1120 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1121 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1122 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1123 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1126 				break;
1127 			case 24:  /* 64 bpp PRT */
1128 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1129 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1130 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1131 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1132 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1133 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1134 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1135 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1136 				break;
1137 			case 25:  /* 128 bpp PRT */
1138 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1139 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1140 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1141 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1142 						 NUM_BANKS(ADDR_SURF_8_BANK) |
1143 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1145 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1146 				break;
1147 			default:
1148 				gb_tile_moden = 0;
1149 				break;
1150 			}
1151 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1152 		}
1153 	} else if (rdev->family == CHIP_VERDE) {
1154 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1155 			switch (reg_offset) {
1156 			case 0:  /* non-AA compressed depth or any compressed stencil */
1157 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1159 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1160 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1161 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1162 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1164 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1165 				break;
1166 			case 1:  /* 2xAA/4xAA compressed depth only */
1167 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1168 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1169 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1170 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1171 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1172 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1173 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1174 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1175 				break;
1176 			case 2:  /* 8xAA compressed depth only */
1177 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1178 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1179 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1180 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1181 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1182 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1183 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1184 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1185 				break;
1186 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1187 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1188 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1189 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1190 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1191 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1192 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1193 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1194 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1195 				break;
1196 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1197 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1198 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1199 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1200 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1201 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1202 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1203 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1204 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1205 				break;
1206 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1207 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1208 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1209 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1210 						 TILE_SPLIT(split_equal_to_row_size) |
1211 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1212 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1213 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1214 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1215 				break;
1216 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1217 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1218 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1219 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1220 						 TILE_SPLIT(split_equal_to_row_size) |
1221 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1222 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1223 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1224 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1225 				break;
1226 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1227 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1229 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1230 						 TILE_SPLIT(split_equal_to_row_size) |
1231 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1232 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1233 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1234 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1235 				break;
1236 			case 8:  /* 1D and 1D Array Surfaces */
1237 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1238 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1239 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1240 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1241 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1242 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1243 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1244 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1245 				break;
1246 			case 9:  /* Displayable maps. */
1247 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1248 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1249 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1250 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1251 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1252 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1253 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1254 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1255 				break;
1256 			case 10:  /* Display 8bpp. */
1257 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1258 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1259 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1260 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1261 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1262 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1265 				break;
1266 			case 11:  /* Display 16bpp. */
1267 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1268 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1269 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1270 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1271 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1272 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1273 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1274 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1275 				break;
1276 			case 12:  /* Display 32bpp. */
1277 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1279 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1281 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1282 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1283 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1284 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1285 				break;
1286 			case 13:  /* Thin. */
1287 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1288 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1289 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1290 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1291 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1292 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1295 				break;
1296 			case 14:  /* Thin 8 bpp. */
1297 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1298 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1299 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1300 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1301 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1302 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1303 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1304 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1305 				break;
1306 			case 15:  /* Thin 16 bpp. */
1307 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1308 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1309 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1310 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1311 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1312 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1313 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1314 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1315 				break;
1316 			case 16:  /* Thin 32 bpp. */
1317 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1318 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1319 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1321 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1322 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1325 				break;
1326 			case 17:  /* Thin 64 bpp. */
1327 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1328 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1329 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1330 						 TILE_SPLIT(split_equal_to_row_size) |
1331 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1332 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1333 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1334 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1335 				break;
1336 			case 21:  /* 8 bpp PRT. */
1337 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1338 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1339 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1340 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1341 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1342 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1343 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1344 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1345 				break;
1346 			case 22:  /* 16 bpp PRT */
1347 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1348 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1349 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1350 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1351 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1352 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1355 				break;
1356 			case 23:  /* 32 bpp PRT */
1357 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1358 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1359 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1360 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1361 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1362 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1363 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1364 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1365 				break;
1366 			case 24:  /* 64 bpp PRT */
1367 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1368 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1369 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1370 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1371 						 NUM_BANKS(ADDR_SURF_16_BANK) |
1372 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1373 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1374 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1375 				break;
1376 			case 25:  /* 128 bpp PRT */
1377 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1378 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1379 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1380 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1381 						 NUM_BANKS(ADDR_SURF_8_BANK) |
1382 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1385 				break;
1386 			default:
1387 				gb_tile_moden = 0;
1388 				break;
1389 			}
1390 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1391 		}
1392 	} else
1393 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1394 }
1395 
1396 static void si_select_se_sh(struct radeon_device *rdev,
1397 			    u32 se_num, u32 sh_num)
1398 {
1399 	u32 data = INSTANCE_BROADCAST_WRITES;
1400 
1401 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1402 		data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1403 	else if (se_num == 0xffffffff)
1404 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1405 	else if (sh_num == 0xffffffff)
1406 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1407 	else
1408 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1409 	WREG32(GRBM_GFX_INDEX, data);
1410 }
1411 
1412 static u32 si_create_bitmask(u32 bit_width)
1413 {
1414 	u32 i, mask = 0;
1415 
1416 	for (i = 0; i < bit_width; i++) {
1417 		mask <<= 1;
1418 		mask |= 1;
1419 	}
1420 	return mask;
1421 }
1422 
1423 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1424 {
1425 	u32 data, mask;
1426 
1427 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1428 	if (data & 1)
1429 		data &= INACTIVE_CUS_MASK;
1430 	else
1431 		data = 0;
1432 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1433 
1434 	data >>= INACTIVE_CUS_SHIFT;
1435 
1436 	mask = si_create_bitmask(cu_per_sh);
1437 
1438 	return ~data & mask;
1439 }
1440 
1441 static void si_setup_spi(struct radeon_device *rdev,
1442 			 u32 se_num, u32 sh_per_se,
1443 			 u32 cu_per_sh)
1444 {
1445 	int i, j, k;
1446 	u32 data, mask, active_cu;
1447 
1448 	for (i = 0; i < se_num; i++) {
1449 		for (j = 0; j < sh_per_se; j++) {
1450 			si_select_se_sh(rdev, i, j);
1451 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1452 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1453 
1454 			mask = 1;
1455 			for (k = 0; k < 16; k++) {
1456 				mask <<= k;
1457 				if (active_cu & mask) {
1458 					data &= ~mask;
1459 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1460 					break;
1461 				}
1462 			}
1463 		}
1464 	}
1465 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1466 }
1467 
1468 static u32 si_get_rb_disabled(struct radeon_device *rdev,
1469 			      u32 max_rb_num, u32 se_num,
1470 			      u32 sh_per_se)
1471 {
1472 	u32 data, mask;
1473 
1474 	data = RREG32(CC_RB_BACKEND_DISABLE);
1475 	if (data & 1)
1476 		data &= BACKEND_DISABLE_MASK;
1477 	else
1478 		data = 0;
1479 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1480 
1481 	data >>= BACKEND_DISABLE_SHIFT;
1482 
1483 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1484 
1485 	return data & mask;
1486 }
1487 
1488 static void si_setup_rb(struct radeon_device *rdev,
1489 			u32 se_num, u32 sh_per_se,
1490 			u32 max_rb_num)
1491 {
1492 	int i, j;
1493 	u32 data, mask;
1494 	u32 disabled_rbs = 0;
1495 	u32 enabled_rbs = 0;
1496 
1497 	for (i = 0; i < se_num; i++) {
1498 		for (j = 0; j < sh_per_se; j++) {
1499 			si_select_se_sh(rdev, i, j);
1500 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1501 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1502 		}
1503 	}
1504 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1505 
1506 	mask = 1;
1507 	for (i = 0; i < max_rb_num; i++) {
1508 		if (!(disabled_rbs & mask))
1509 			enabled_rbs |= mask;
1510 		mask <<= 1;
1511 	}
1512 
1513 	for (i = 0; i < se_num; i++) {
1514 		si_select_se_sh(rdev, i, 0xffffffff);
1515 		data = 0;
1516 		for (j = 0; j < sh_per_se; j++) {
1517 			switch (enabled_rbs & 3) {
1518 			case 1:
1519 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1520 				break;
1521 			case 2:
1522 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1523 				break;
1524 			case 3:
1525 			default:
1526 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1527 				break;
1528 			}
1529 			enabled_rbs >>= 2;
1530 		}
1531 		WREG32(PA_SC_RASTER_CONFIG, data);
1532 	}
1533 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1534 }
1535 
1536 static void si_gpu_init(struct radeon_device *rdev)
1537 {
1538 	u32 gb_addr_config = 0;
1539 	u32 mc_shared_chmap, mc_arb_ramcfg;
1540 	u32 sx_debug_1;
1541 	u32 hdp_host_path_cntl;
1542 	u32 tmp;
1543 	int i, j;
1544 
1545 	switch (rdev->family) {
1546 	case CHIP_TAHITI:
1547 		rdev->config.si.max_shader_engines = 2;
1548 		rdev->config.si.max_tile_pipes = 12;
1549 		rdev->config.si.max_cu_per_sh = 8;
1550 		rdev->config.si.max_sh_per_se = 2;
1551 		rdev->config.si.max_backends_per_se = 4;
1552 		rdev->config.si.max_texture_channel_caches = 12;
1553 		rdev->config.si.max_gprs = 256;
1554 		rdev->config.si.max_gs_threads = 32;
1555 		rdev->config.si.max_hw_contexts = 8;
1556 
1557 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1558 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1559 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1560 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1561 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1562 		break;
1563 	case CHIP_PITCAIRN:
1564 		rdev->config.si.max_shader_engines = 2;
1565 		rdev->config.si.max_tile_pipes = 8;
1566 		rdev->config.si.max_cu_per_sh = 5;
1567 		rdev->config.si.max_sh_per_se = 2;
1568 		rdev->config.si.max_backends_per_se = 4;
1569 		rdev->config.si.max_texture_channel_caches = 8;
1570 		rdev->config.si.max_gprs = 256;
1571 		rdev->config.si.max_gs_threads = 32;
1572 		rdev->config.si.max_hw_contexts = 8;
1573 
1574 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1575 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1576 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1577 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1578 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1579 		break;
1580 	case CHIP_VERDE:
1581 	default:
1582 		rdev->config.si.max_shader_engines = 1;
1583 		rdev->config.si.max_tile_pipes = 4;
1584 		rdev->config.si.max_cu_per_sh = 2;
1585 		rdev->config.si.max_sh_per_se = 2;
1586 		rdev->config.si.max_backends_per_se = 4;
1587 		rdev->config.si.max_texture_channel_caches = 4;
1588 		rdev->config.si.max_gprs = 256;
1589 		rdev->config.si.max_gs_threads = 32;
1590 		rdev->config.si.max_hw_contexts = 8;
1591 
1592 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1593 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1594 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1595 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1596 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1597 		break;
1598 	}
1599 
1600 	/* Initialize HDP */
1601 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1602 		WREG32((0x2c14 + j), 0x00000000);
1603 		WREG32((0x2c18 + j), 0x00000000);
1604 		WREG32((0x2c1c + j), 0x00000000);
1605 		WREG32((0x2c20 + j), 0x00000000);
1606 		WREG32((0x2c24 + j), 0x00000000);
1607 	}
1608 
1609 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1610 
1611 	evergreen_fix_pci_max_read_req_size(rdev);
1612 
1613 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1614 
1615 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1616 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1617 
1618 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1619 	rdev->config.si.mem_max_burst_length_bytes = 256;
1620 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1621 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1622 	if (rdev->config.si.mem_row_size_in_kb > 4)
1623 		rdev->config.si.mem_row_size_in_kb = 4;
1624 	/* XXX use MC settings? */
1625 	rdev->config.si.shader_engine_tile_size = 32;
1626 	rdev->config.si.num_gpus = 1;
1627 	rdev->config.si.multi_gpu_tile_size = 64;
1628 
1629 	/* fix up row size */
1630 	gb_addr_config &= ~ROW_SIZE_MASK;
1631 	switch (rdev->config.si.mem_row_size_in_kb) {
1632 	case 1:
1633 	default:
1634 		gb_addr_config |= ROW_SIZE(0);
1635 		break;
1636 	case 2:
1637 		gb_addr_config |= ROW_SIZE(1);
1638 		break;
1639 	case 4:
1640 		gb_addr_config |= ROW_SIZE(2);
1641 		break;
1642 	}
1643 
1644 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1645 	 * not have bank info, so create a custom tiling dword.
1646 	 * bits 3:0   num_pipes
1647 	 * bits 7:4   num_banks
1648 	 * bits 11:8  group_size
1649 	 * bits 15:12 row_size
1650 	 */
1651 	rdev->config.si.tile_config = 0;
1652 	switch (rdev->config.si.num_tile_pipes) {
1653 	case 1:
1654 		rdev->config.si.tile_config |= (0 << 0);
1655 		break;
1656 	case 2:
1657 		rdev->config.si.tile_config |= (1 << 0);
1658 		break;
1659 	case 4:
1660 		rdev->config.si.tile_config |= (2 << 0);
1661 		break;
1662 	case 8:
1663 	default:
1664 		/* XXX what about 12? */
1665 		rdev->config.si.tile_config |= (3 << 0);
1666 		break;
1667 	}
1668 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1669 	case 0: /* four banks */
1670 		rdev->config.si.tile_config |= 0 << 4;
1671 		break;
1672 	case 1: /* eight banks */
1673 		rdev->config.si.tile_config |= 1 << 4;
1674 		break;
1675 	case 2: /* sixteen banks */
1676 	default:
1677 		rdev->config.si.tile_config |= 2 << 4;
1678 		break;
1679 	}
1680 	rdev->config.si.tile_config |=
1681 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1682 	rdev->config.si.tile_config |=
1683 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1684 
1685 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1686 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1687 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1688 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1689 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1690 
1691 	si_tiling_mode_table_init(rdev);
1692 
1693 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1694 		    rdev->config.si.max_sh_per_se,
1695 		    rdev->config.si.max_backends_per_se);
1696 
1697 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1698 		     rdev->config.si.max_sh_per_se,
1699 		     rdev->config.si.max_cu_per_sh);
1700 
1701 
1702 	/* set HW defaults for 3D engine */
1703 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1704 				     ROQ_IB2_START(0x2b)));
1705 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1706 
1707 	sx_debug_1 = RREG32(SX_DEBUG_1);
1708 	WREG32(SX_DEBUG_1, sx_debug_1);
1709 
1710 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1711 
1712 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1713 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1714 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1715 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1716 
1717 	WREG32(VGT_NUM_INSTANCES, 1);
1718 
1719 	WREG32(CP_PERFMON_CNTL, 0);
1720 
1721 	WREG32(SQ_CONFIG, 0);
1722 
1723 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1724 					  FORCE_EOV_MAX_REZ_CNT(255)));
1725 
1726 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1727 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1728 
1729 	WREG32(VGT_GS_VERTEX_REUSE, 16);
1730 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1731 
1732 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1733 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1734 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1735 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1736 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1737 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1738 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1739 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1740 
1741 	tmp = RREG32(HDP_MISC_CNTL);
1742 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1743 	WREG32(HDP_MISC_CNTL, tmp);
1744 
1745 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1746 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1747 
1748 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1749 
1750 	DRM_UDELAY(50);
1751 }
1752 
1753 /*
1754  * GPU scratch registers helpers function.
1755  */
1756 static void si_scratch_init(struct radeon_device *rdev)
1757 {
1758 	int i;
1759 
1760 	rdev->scratch.num_reg = 7;
1761 	rdev->scratch.reg_base = SCRATCH_REG0;
1762 	for (i = 0; i < rdev->scratch.num_reg; i++) {
1763 		rdev->scratch.free[i] = true;
1764 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1765 	}
1766 }
1767 
1768 void si_fence_ring_emit(struct radeon_device *rdev,
1769 			struct radeon_fence *fence)
1770 {
1771 	struct radeon_ring *ring = &rdev->ring[fence->ring];
1772 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1773 
1774 	/* flush read cache over gart */
1775 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1776 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1777 	radeon_ring_write(ring, 0);
1778 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1779 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1780 			  PACKET3_TC_ACTION_ENA |
1781 			  PACKET3_SH_KCACHE_ACTION_ENA |
1782 			  PACKET3_SH_ICACHE_ACTION_ENA);
1783 	radeon_ring_write(ring, 0xFFFFFFFF);
1784 	radeon_ring_write(ring, 0);
1785 	radeon_ring_write(ring, 10); /* poll interval */
1786 	/* EVENT_WRITE_EOP - flush caches, send int */
1787 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1788 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
1789 	radeon_ring_write(ring, addr & 0xffffffff);
1790 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1791 	radeon_ring_write(ring, fence->seq);
1792 	radeon_ring_write(ring, 0);
1793 }
1794 
1795 /*
1796  * IB stuff
1797  */
1798 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1799 {
1800 	struct radeon_ring *ring = &rdev->ring[ib->ring];
1801 	u32 header;
1802 
1803 	if (ib->is_const_ib) {
1804 		/* set switch buffer packet before const IB */
1805 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1806 		radeon_ring_write(ring, 0);
1807 
1808 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1809 	} else {
1810 		u32 next_rptr;
1811 		if (ring->rptr_save_reg) {
1812 			next_rptr = ring->wptr + 3 + 4 + 8;
1813 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1814 			radeon_ring_write(ring, ((ring->rptr_save_reg -
1815 						  PACKET3_SET_CONFIG_REG_START) >> 2));
1816 			radeon_ring_write(ring, next_rptr);
1817 		} else if (rdev->wb.enabled) {
1818 			next_rptr = ring->wptr + 5 + 4 + 8;
1819 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1820 			radeon_ring_write(ring, (1 << 8));
1821 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1822 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1823 			radeon_ring_write(ring, next_rptr);
1824 		}
1825 
1826 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1827 	}
1828 
1829 	radeon_ring_write(ring, header);
1830 	radeon_ring_write(ring,
1831 #ifdef __BIG_ENDIAN
1832 			  (2 << 0) |
1833 #endif
1834 			  (ib->gpu_addr & 0xFFFFFFFC));
1835 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1836 	radeon_ring_write(ring, ib->length_dw |
1837 			  (ib->vm ? (ib->vm->id << 24) : 0));
1838 
1839 	if (!ib->is_const_ib) {
1840 		/* flush read cache over gart for this vmid */
1841 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1842 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1843 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1844 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1845 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1846 				  PACKET3_TC_ACTION_ENA |
1847 				  PACKET3_SH_KCACHE_ACTION_ENA |
1848 				  PACKET3_SH_ICACHE_ACTION_ENA);
1849 		radeon_ring_write(ring, 0xFFFFFFFF);
1850 		radeon_ring_write(ring, 0);
1851 		radeon_ring_write(ring, 10); /* poll interval */
1852 	}
1853 }
1854 
1855 /*
1856  * CP.
1857  */
1858 static void si_cp_enable(struct radeon_device *rdev, bool enable)
1859 {
1860 	if (enable)
1861 		WREG32(CP_ME_CNTL, 0);
1862 	else {
1863 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1864 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1865 		WREG32(SCRATCH_UMSK, 0);
1866 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1867 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1868 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1869 	}
1870 	DRM_UDELAY(50);
1871 }
1872 
1873 static int si_cp_load_microcode(struct radeon_device *rdev)
1874 {
1875 	const __be32 *fw_data;
1876 	int i;
1877 
1878 	if (!rdev->me_fw || !rdev->pfp_fw)
1879 		return -EINVAL;
1880 
1881 	si_cp_enable(rdev, false);
1882 
1883 	/* PFP */
1884 	fw_data = (const __be32 *)rdev->pfp_fw->data;
1885 	WREG32(CP_PFP_UCODE_ADDR, 0);
1886 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1887 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1888 	WREG32(CP_PFP_UCODE_ADDR, 0);
1889 
1890 	/* CE */
1891 	fw_data = (const __be32 *)rdev->ce_fw->data;
1892 	WREG32(CP_CE_UCODE_ADDR, 0);
1893 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1894 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1895 	WREG32(CP_CE_UCODE_ADDR, 0);
1896 
1897 	/* ME */
1898 	fw_data = (const __be32 *)rdev->me_fw->data;
1899 	WREG32(CP_ME_RAM_WADDR, 0);
1900 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1901 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1902 	WREG32(CP_ME_RAM_WADDR, 0);
1903 
1904 	WREG32(CP_PFP_UCODE_ADDR, 0);
1905 	WREG32(CP_CE_UCODE_ADDR, 0);
1906 	WREG32(CP_ME_RAM_WADDR, 0);
1907 	WREG32(CP_ME_RAM_RADDR, 0);
1908 	return 0;
1909 }
1910 
1911 static int si_cp_start(struct radeon_device *rdev)
1912 {
1913 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1914 	int r, i;
1915 
1916 	r = radeon_ring_lock(rdev, ring, 7 + 4);
1917 	if (r) {
1918 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1919 		return r;
1920 	}
1921 	/* init the CP */
1922 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1923 	radeon_ring_write(ring, 0x1);
1924 	radeon_ring_write(ring, 0x0);
1925 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
1926 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1927 	radeon_ring_write(ring, 0);
1928 	radeon_ring_write(ring, 0);
1929 
1930 	/* init the CE partitions */
1931 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1932 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1933 	radeon_ring_write(ring, 0xc000);
1934 	radeon_ring_write(ring, 0xe000);
1935 	radeon_ring_unlock_commit(rdev, ring);
1936 
1937 	si_cp_enable(rdev, true);
1938 
1939 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
1940 	if (r) {
1941 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1942 		return r;
1943 	}
1944 
1945 	/* setup clear context state */
1946 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1947 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1948 
1949 	for (i = 0; i < si_default_size; i++)
1950 		radeon_ring_write(ring, si_default_state[i]);
1951 
1952 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1953 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1954 
1955 	/* set clear context state */
1956 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1957 	radeon_ring_write(ring, 0);
1958 
1959 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1960 	radeon_ring_write(ring, 0x00000316);
1961 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1962 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1963 
1964 	radeon_ring_unlock_commit(rdev, ring);
1965 
1966 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
1967 		ring = &rdev->ring[i];
1968 		r = radeon_ring_lock(rdev, ring, 2);
1969 
1970 		/* clear the compute context state */
1971 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
1972 		radeon_ring_write(ring, 0);
1973 
1974 		radeon_ring_unlock_commit(rdev, ring);
1975 	}
1976 
1977 	return 0;
1978 }
1979 
1980 static void si_cp_fini(struct radeon_device *rdev)
1981 {
1982 	struct radeon_ring *ring;
1983 	si_cp_enable(rdev, false);
1984 
1985 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1986 	radeon_ring_fini(rdev, ring);
1987 	radeon_scratch_free(rdev, ring->rptr_save_reg);
1988 
1989 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
1990 	radeon_ring_fini(rdev, ring);
1991 	radeon_scratch_free(rdev, ring->rptr_save_reg);
1992 
1993 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
1994 	radeon_ring_fini(rdev, ring);
1995 	radeon_scratch_free(rdev, ring->rptr_save_reg);
1996 }
1997 
1998 static int si_cp_resume(struct radeon_device *rdev)
1999 {
2000 	struct radeon_ring *ring;
2001 	u32 tmp;
2002 	u32 rb_bufsz;
2003 	int r;
2004 
2005 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2006 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2007 				 SOFT_RESET_PA |
2008 				 SOFT_RESET_VGT |
2009 				 SOFT_RESET_SPI |
2010 				 SOFT_RESET_SX));
2011 	RREG32(GRBM_SOFT_RESET);
2012 	DRM_MDELAY(15);
2013 	WREG32(GRBM_SOFT_RESET, 0);
2014 	RREG32(GRBM_SOFT_RESET);
2015 
2016 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
2017 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2018 
2019 	/* Set the write pointer delay */
2020 	WREG32(CP_RB_WPTR_DELAY, 0);
2021 
2022 	WREG32(CP_DEBUG, 0);
2023 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2024 
2025 	/* ring 0 - compute and gfx */
2026 	/* Set ring buffer size */
2027 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2028 	rb_bufsz = drm_order(ring->ring_size / 8);
2029 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2030 #ifdef __BIG_ENDIAN
2031 	tmp |= BUF_SWAP_32BIT;
2032 #endif
2033 	WREG32(CP_RB0_CNTL, tmp);
2034 
2035 	/* Initialize the ring buffer's read and write pointers */
2036 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2037 	ring->wptr = 0;
2038 	WREG32(CP_RB0_WPTR, ring->wptr);
2039 
2040 	/* set the wb address whether it's enabled or not */
2041 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2042 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2043 
2044 	if (rdev->wb.enabled)
2045 		WREG32(SCRATCH_UMSK, 0xff);
2046 	else {
2047 		tmp |= RB_NO_UPDATE;
2048 		WREG32(SCRATCH_UMSK, 0);
2049 	}
2050 
2051 	DRM_MDELAY(1);
2052 	WREG32(CP_RB0_CNTL, tmp);
2053 
2054 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2055 
2056 	ring->rptr = RREG32(CP_RB0_RPTR);
2057 
2058 	/* ring1  - compute only */
2059 	/* Set ring buffer size */
2060 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2061 	rb_bufsz = drm_order(ring->ring_size / 8);
2062 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2063 #ifdef __BIG_ENDIAN
2064 	tmp |= BUF_SWAP_32BIT;
2065 #endif
2066 	WREG32(CP_RB1_CNTL, tmp);
2067 
2068 	/* Initialize the ring buffer's read and write pointers */
2069 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2070 	ring->wptr = 0;
2071 	WREG32(CP_RB1_WPTR, ring->wptr);
2072 
2073 	/* set the wb address whether it's enabled or not */
2074 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2075 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2076 
2077 	DRM_MDELAY(1);
2078 	WREG32(CP_RB1_CNTL, tmp);
2079 
2080 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2081 
2082 	ring->rptr = RREG32(CP_RB1_RPTR);
2083 
2084 	/* ring2 - compute only */
2085 	/* Set ring buffer size */
2086 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2087 	rb_bufsz = drm_order(ring->ring_size / 8);
2088 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2089 #ifdef __BIG_ENDIAN
2090 	tmp |= BUF_SWAP_32BIT;
2091 #endif
2092 	WREG32(CP_RB2_CNTL, tmp);
2093 
2094 	/* Initialize the ring buffer's read and write pointers */
2095 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2096 	ring->wptr = 0;
2097 	WREG32(CP_RB2_WPTR, ring->wptr);
2098 
2099 	/* set the wb address whether it's enabled or not */
2100 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2101 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2102 
2103 	DRM_MDELAY(1);
2104 	WREG32(CP_RB2_CNTL, tmp);
2105 
2106 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2107 
2108 	ring->rptr = RREG32(CP_RB2_RPTR);
2109 
2110 	/* start the rings */
2111 	si_cp_start(rdev);
2112 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2113 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2114 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2115 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2116 	if (r) {
2117 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2118 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2119 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2120 		return r;
2121 	}
2122 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2123 	if (r) {
2124 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2125 	}
2126 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2127 	if (r) {
2128 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2129 	}
2130 
2131 	return 0;
2132 }
2133 
2134 bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2135 {
2136 	u32 srbm_status;
2137 	u32 grbm_status, grbm_status2;
2138 	u32 grbm_status_se0, grbm_status_se1;
2139 
2140 	srbm_status = RREG32(SRBM_STATUS);
2141 	grbm_status = RREG32(GRBM_STATUS);
2142 	grbm_status2 = RREG32(GRBM_STATUS2);
2143 	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2144 	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2145 	if (!(grbm_status & GUI_ACTIVE)) {
2146 		radeon_ring_lockup_update(ring);
2147 		return false;
2148 	}
2149 	/* force CP activities */
2150 	radeon_ring_force_activity(rdev, ring);
2151 	return radeon_ring_test_lockup(rdev, ring);
2152 }
2153 
2154 static void si_gpu_soft_reset_gfx(struct radeon_device *rdev)
2155 {
2156 	u32 grbm_reset = 0;
2157 
2158 	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2159 		return;
2160 
2161 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2162 		RREG32(GRBM_STATUS));
2163 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2164 		RREG32(GRBM_STATUS2));
2165 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2166 		RREG32(GRBM_STATUS_SE0));
2167 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2168 		RREG32(GRBM_STATUS_SE1));
2169 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2170 		RREG32(SRBM_STATUS));
2171 
2172 	/* Disable CP parsing/prefetching */
2173 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2174 
2175 	/* reset all the gfx blocks */
2176 	grbm_reset = (SOFT_RESET_CP |
2177 		      SOFT_RESET_CB |
2178 		      SOFT_RESET_DB |
2179 		      SOFT_RESET_GDS |
2180 		      SOFT_RESET_PA |
2181 		      SOFT_RESET_SC |
2182 		      SOFT_RESET_BCI |
2183 		      SOFT_RESET_SPI |
2184 		      SOFT_RESET_SX |
2185 		      SOFT_RESET_TC |
2186 		      SOFT_RESET_TA |
2187 		      SOFT_RESET_VGT |
2188 		      SOFT_RESET_IA);
2189 
2190 	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2191 	WREG32(GRBM_SOFT_RESET, grbm_reset);
2192 	(void)RREG32(GRBM_SOFT_RESET);
2193 	DRM_UDELAY(50);
2194 	WREG32(GRBM_SOFT_RESET, 0);
2195 	(void)RREG32(GRBM_SOFT_RESET);
2196 
2197 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2198 		RREG32(GRBM_STATUS));
2199 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2200 		RREG32(GRBM_STATUS2));
2201 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2202 		RREG32(GRBM_STATUS_SE0));
2203 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2204 		RREG32(GRBM_STATUS_SE1));
2205 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2206 		RREG32(SRBM_STATUS));
2207 }
2208 
2209 static void si_gpu_soft_reset_dma(struct radeon_device *rdev)
2210 {
2211 	u32 tmp;
2212 
2213 	if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
2214 		return;
2215 
2216 	dev_info(rdev->dev, "  DMA_STATUS_REG   = 0x%08X\n",
2217 		RREG32(DMA_STATUS_REG));
2218 
2219 	/* dma0 */
2220 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
2221 	tmp &= ~DMA_RB_ENABLE;
2222 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
2223 
2224 	/* dma1 */
2225 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
2226 	tmp &= ~DMA_RB_ENABLE;
2227 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
2228 
2229 	/* Reset dma */
2230 	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
2231 	RREG32(SRBM_SOFT_RESET);
2232 	DRM_UDELAY(50);
2233 	WREG32(SRBM_SOFT_RESET, 0);
2234 
2235 	dev_info(rdev->dev, "  DMA_STATUS_REG   = 0x%08X\n",
2236 		RREG32(DMA_STATUS_REG));
2237 }
2238 
2239 static int si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
2240 {
2241 	struct evergreen_mc_save save;
2242 
2243 	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2244 		reset_mask &= ~(RADEON_RESET_GFX | RADEON_RESET_COMPUTE);
2245 
2246 	if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
2247 		reset_mask &= ~RADEON_RESET_DMA;
2248 
2249 	if (reset_mask == 0)
2250 		return 0;
2251 
2252 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2253 
2254 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
2255 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2256 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2257 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2258 
2259 	evergreen_mc_stop(rdev, &save);
2260 	if (radeon_mc_wait_for_idle(rdev)) {
2261 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2262 	}
2263 
2264 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
2265 		si_gpu_soft_reset_gfx(rdev);
2266 
2267 	if (reset_mask & RADEON_RESET_DMA)
2268 		si_gpu_soft_reset_dma(rdev);
2269 
2270 	/* Wait a little for things to settle down */
2271 	DRM_UDELAY(50);
2272 
2273 	evergreen_mc_resume(rdev, &save);
2274 	return 0;
2275 }
2276 
2277 int si_asic_reset(struct radeon_device *rdev)
2278 {
2279 	return si_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
2280 					RADEON_RESET_COMPUTE |
2281 					RADEON_RESET_DMA));
2282 }
2283 
2284 /* MC */
2285 static void si_mc_program(struct radeon_device *rdev)
2286 {
2287 	struct evergreen_mc_save save;
2288 	u32 tmp;
2289 	int i, j;
2290 
2291 	/* Initialize HDP */
2292 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2293 		WREG32((0x2c14 + j), 0x00000000);
2294 		WREG32((0x2c18 + j), 0x00000000);
2295 		WREG32((0x2c1c + j), 0x00000000);
2296 		WREG32((0x2c20 + j), 0x00000000);
2297 		WREG32((0x2c24 + j), 0x00000000);
2298 	}
2299 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2300 
2301 	evergreen_mc_stop(rdev, &save);
2302 	if (radeon_mc_wait_for_idle(rdev)) {
2303 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2304 	}
2305 	/* Lockout access through VGA aperture*/
2306 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2307 	/* Update configuration */
2308 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2309 	       rdev->mc.vram_start >> 12);
2310 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2311 	       rdev->mc.vram_end >> 12);
2312 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2313 	       rdev->vram_scratch.gpu_addr >> 12);
2314 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2315 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2316 	WREG32(MC_VM_FB_LOCATION, tmp);
2317 	/* XXX double check these! */
2318 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2319 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2320 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2321 	WREG32(MC_VM_AGP_BASE, 0);
2322 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2323 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2324 	if (radeon_mc_wait_for_idle(rdev)) {
2325 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2326 	}
2327 	evergreen_mc_resume(rdev, &save);
2328 	/* we need to own VRAM, so turn off the VGA renderer here
2329 	 * to stop it overwriting our objects */
2330 	rv515_vga_render_disable(rdev);
2331 }
2332 
2333 /* SI MC address space is 40 bits */
2334 static void si_vram_location(struct radeon_device *rdev,
2335 			     struct radeon_mc *mc, u64 base)
2336 {
2337 	mc->vram_start = base;
2338 	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2339 		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2340 		mc->real_vram_size = mc->aper_size;
2341 		mc->mc_vram_size = mc->aper_size;
2342 	}
2343 	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2344 	dev_info(rdev->dev, "VRAM: %juM 0x%016jX - 0x%016jX (%juM used)\n",
2345 			(uintmax_t)mc->mc_vram_size >> 20, (uintmax_t)mc->vram_start,
2346 			(uintmax_t)mc->vram_end, (uintmax_t)mc->real_vram_size >> 20);
2347 }
2348 
2349 static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2350 {
2351 	u64 size_af, size_bf;
2352 
2353 	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2354 	size_bf = mc->vram_start & ~mc->gtt_base_align;
2355 	if (size_bf > size_af) {
2356 		if (mc->gtt_size > size_bf) {
2357 			dev_warn(rdev->dev, "limiting GTT\n");
2358 			mc->gtt_size = size_bf;
2359 		}
2360 		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2361 	} else {
2362 		if (mc->gtt_size > size_af) {
2363 			dev_warn(rdev->dev, "limiting GTT\n");
2364 			mc->gtt_size = size_af;
2365 		}
2366 		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2367 	}
2368 	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2369 	dev_info(rdev->dev, "GTT: %juM 0x%016jX - 0x%016jX\n",
2370 			(uintmax_t)mc->gtt_size >> 20, (uintmax_t)mc->gtt_start, (uintmax_t)mc->gtt_end);
2371 }
2372 
2373 static void si_vram_gtt_location(struct radeon_device *rdev,
2374 				 struct radeon_mc *mc)
2375 {
2376 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
2377 		/* leave room for at least 1024M GTT */
2378 		dev_warn(rdev->dev, "limiting VRAM\n");
2379 		mc->real_vram_size = 0xFFC0000000ULL;
2380 		mc->mc_vram_size = 0xFFC0000000ULL;
2381 	}
2382 	si_vram_location(rdev, &rdev->mc, 0);
2383 	rdev->mc.gtt_base_align = 0;
2384 	si_gtt_location(rdev, mc);
2385 }
2386 
2387 static int si_mc_init(struct radeon_device *rdev)
2388 {
2389 	u32 tmp;
2390 	int chansize, numchan;
2391 
2392 	/* Get VRAM informations */
2393 	rdev->mc.vram_is_ddr = true;
2394 	tmp = RREG32(MC_ARB_RAMCFG);
2395 	if (tmp & CHANSIZE_OVERRIDE) {
2396 		chansize = 16;
2397 	} else if (tmp & CHANSIZE_MASK) {
2398 		chansize = 64;
2399 	} else {
2400 		chansize = 32;
2401 	}
2402 	tmp = RREG32(MC_SHARED_CHMAP);
2403 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2404 	case 0:
2405 	default:
2406 		numchan = 1;
2407 		break;
2408 	case 1:
2409 		numchan = 2;
2410 		break;
2411 	case 2:
2412 		numchan = 4;
2413 		break;
2414 	case 3:
2415 		numchan = 8;
2416 		break;
2417 	case 4:
2418 		numchan = 3;
2419 		break;
2420 	case 5:
2421 		numchan = 6;
2422 		break;
2423 	case 6:
2424 		numchan = 10;
2425 		break;
2426 	case 7:
2427 		numchan = 12;
2428 		break;
2429 	case 8:
2430 		numchan = 16;
2431 		break;
2432 	}
2433 	rdev->mc.vram_width = numchan * chansize;
2434 	/* Could aper size report 0 ? */
2435 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
2436 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
2437 	/* size in MB on si */
2438 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2439 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2440 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
2441 	si_vram_gtt_location(rdev, &rdev->mc);
2442 	radeon_update_bandwidth_info(rdev);
2443 
2444 	return 0;
2445 }
2446 
2447 /*
2448  * GART
2449  */
2450 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
2451 {
2452 	/* flush hdp cache */
2453 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2454 
2455 	/* bits 0-15 are the VM contexts0-15 */
2456 	WREG32(VM_INVALIDATE_REQUEST, 1);
2457 }
2458 
2459 static int si_pcie_gart_enable(struct radeon_device *rdev)
2460 {
2461 	int r, i;
2462 
2463 	if (rdev->gart.robj == NULL) {
2464 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2465 		return -EINVAL;
2466 	}
2467 	r = radeon_gart_table_vram_pin(rdev);
2468 	if (r)
2469 		return r;
2470 	radeon_gart_restore(rdev);
2471 	/* Setup TLB control */
2472 	WREG32(MC_VM_MX_L1_TLB_CNTL,
2473 	       (0xA << 7) |
2474 	       ENABLE_L1_TLB |
2475 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2476 	       ENABLE_ADVANCED_DRIVER_MODEL |
2477 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2478 	/* Setup L2 cache */
2479 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2480 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2481 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2482 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2483 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2484 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2485 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2486 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2487 	/* setup context0 */
2488 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2489 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2490 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2491 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2492 			(u32)(rdev->dummy_page.addr >> 12));
2493 	WREG32(VM_CONTEXT0_CNTL2, 0);
2494 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2495 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2496 
2497 	WREG32(0x15D4, 0);
2498 	WREG32(0x15D8, 0);
2499 	WREG32(0x15DC, 0);
2500 
2501 	/* empty context1-15 */
2502 	/* set vm size, must be a multiple of 4 */
2503 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2504 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2505 	/* Assign the pt base to something valid for now; the pts used for
2506 	 * the VMs are determined by the application and setup and assigned
2507 	 * on the fly in the vm part of radeon_gart.c
2508 	 */
2509 	for (i = 1; i < 16; i++) {
2510 		if (i < 8)
2511 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2512 			       rdev->gart.table_addr >> 12);
2513 		else
2514 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2515 			       rdev->gart.table_addr >> 12);
2516 	}
2517 
2518 	/* enable context1-15 */
2519 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2520 	       (u32)(rdev->dummy_page.addr >> 12));
2521 	WREG32(VM_CONTEXT1_CNTL2, 4);
2522 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
2523 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2524 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2525 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2526 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2527 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2528 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2529 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2530 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2531 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2532 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2533 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2534 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
2535 
2536 	si_pcie_gart_tlb_flush(rdev);
2537 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2538 		 (unsigned)(rdev->mc.gtt_size >> 20),
2539 		 (unsigned long long)rdev->gart.table_addr);
2540 	rdev->gart.ready = true;
2541 	return 0;
2542 }
2543 
2544 static void si_pcie_gart_disable(struct radeon_device *rdev)
2545 {
2546 	/* Disable all tables */
2547 	WREG32(VM_CONTEXT0_CNTL, 0);
2548 	WREG32(VM_CONTEXT1_CNTL, 0);
2549 	/* Setup TLB control */
2550 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2551 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2552 	/* Setup L2 cache */
2553 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2554 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2555 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2556 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2557 	WREG32(VM_L2_CNTL2, 0);
2558 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2559 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2560 	radeon_gart_table_vram_unpin(rdev);
2561 }
2562 
2563 static void si_pcie_gart_fini(struct radeon_device *rdev)
2564 {
2565 	si_pcie_gart_disable(rdev);
2566 	radeon_gart_table_vram_free(rdev);
2567 	radeon_gart_fini(rdev);
2568 }
2569 
2570 /* vm parser */
2571 static bool si_vm_reg_valid(u32 reg)
2572 {
2573 	/* context regs are fine */
2574 	if (reg >= 0x28000)
2575 		return true;
2576 
2577 	/* check config regs */
2578 	switch (reg) {
2579 	case GRBM_GFX_INDEX:
2580 	case CP_STRMOUT_CNTL:
2581 	case VGT_VTX_VECT_EJECT_REG:
2582 	case VGT_CACHE_INVALIDATION:
2583 	case VGT_ESGS_RING_SIZE:
2584 	case VGT_GSVS_RING_SIZE:
2585 	case VGT_GS_VERTEX_REUSE:
2586 	case VGT_PRIMITIVE_TYPE:
2587 	case VGT_INDEX_TYPE:
2588 	case VGT_NUM_INDICES:
2589 	case VGT_NUM_INSTANCES:
2590 	case VGT_TF_RING_SIZE:
2591 	case VGT_HS_OFFCHIP_PARAM:
2592 	case VGT_TF_MEMORY_BASE:
2593 	case PA_CL_ENHANCE:
2594 	case PA_SU_LINE_STIPPLE_VALUE:
2595 	case PA_SC_LINE_STIPPLE_STATE:
2596 	case PA_SC_ENHANCE:
2597 	case SQC_CACHES:
2598 	case SPI_STATIC_THREAD_MGMT_1:
2599 	case SPI_STATIC_THREAD_MGMT_2:
2600 	case SPI_STATIC_THREAD_MGMT_3:
2601 	case SPI_PS_MAX_WAVE_ID:
2602 	case SPI_CONFIG_CNTL:
2603 	case SPI_CONFIG_CNTL_1:
2604 	case TA_CNTL_AUX:
2605 		return true;
2606 	default:
2607 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2608 		return false;
2609 	}
2610 }
2611 
2612 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2613 				  u32 *ib, struct radeon_cs_packet *pkt)
2614 {
2615 	switch (pkt->opcode) {
2616 	case PACKET3_NOP:
2617 	case PACKET3_SET_BASE:
2618 	case PACKET3_SET_CE_DE_COUNTERS:
2619 	case PACKET3_LOAD_CONST_RAM:
2620 	case PACKET3_WRITE_CONST_RAM:
2621 	case PACKET3_WRITE_CONST_RAM_OFFSET:
2622 	case PACKET3_DUMP_CONST_RAM:
2623 	case PACKET3_INCREMENT_CE_COUNTER:
2624 	case PACKET3_WAIT_ON_DE_COUNTER:
2625 	case PACKET3_CE_WRITE:
2626 		break;
2627 	default:
2628 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2629 		return -EINVAL;
2630 	}
2631 	return 0;
2632 }
2633 
2634 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2635 				   u32 *ib, struct radeon_cs_packet *pkt)
2636 {
2637 	u32 idx = pkt->idx + 1;
2638 	u32 idx_value = ib[idx];
2639 	u32 start_reg, end_reg, reg, i;
2640 	u32 command, info;
2641 
2642 	switch (pkt->opcode) {
2643 	case PACKET3_NOP:
2644 	case PACKET3_SET_BASE:
2645 	case PACKET3_CLEAR_STATE:
2646 	case PACKET3_INDEX_BUFFER_SIZE:
2647 	case PACKET3_DISPATCH_DIRECT:
2648 	case PACKET3_DISPATCH_INDIRECT:
2649 	case PACKET3_ALLOC_GDS:
2650 	case PACKET3_WRITE_GDS_RAM:
2651 	case PACKET3_ATOMIC_GDS:
2652 	case PACKET3_ATOMIC:
2653 	case PACKET3_OCCLUSION_QUERY:
2654 	case PACKET3_SET_PREDICATION:
2655 	case PACKET3_COND_EXEC:
2656 	case PACKET3_PRED_EXEC:
2657 	case PACKET3_DRAW_INDIRECT:
2658 	case PACKET3_DRAW_INDEX_INDIRECT:
2659 	case PACKET3_INDEX_BASE:
2660 	case PACKET3_DRAW_INDEX_2:
2661 	case PACKET3_CONTEXT_CONTROL:
2662 	case PACKET3_INDEX_TYPE:
2663 	case PACKET3_DRAW_INDIRECT_MULTI:
2664 	case PACKET3_DRAW_INDEX_AUTO:
2665 	case PACKET3_DRAW_INDEX_IMMD:
2666 	case PACKET3_NUM_INSTANCES:
2667 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2668 	case PACKET3_STRMOUT_BUFFER_UPDATE:
2669 	case PACKET3_DRAW_INDEX_OFFSET_2:
2670 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2671 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2672 	case PACKET3_MPEG_INDEX:
2673 	case PACKET3_WAIT_REG_MEM:
2674 	case PACKET3_MEM_WRITE:
2675 	case PACKET3_PFP_SYNC_ME:
2676 	case PACKET3_SURFACE_SYNC:
2677 	case PACKET3_EVENT_WRITE:
2678 	case PACKET3_EVENT_WRITE_EOP:
2679 	case PACKET3_EVENT_WRITE_EOS:
2680 	case PACKET3_SET_CONTEXT_REG:
2681 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2682 	case PACKET3_SET_SH_REG:
2683 	case PACKET3_SET_SH_REG_OFFSET:
2684 	case PACKET3_INCREMENT_DE_COUNTER:
2685 	case PACKET3_WAIT_ON_CE_COUNTER:
2686 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2687 	case PACKET3_ME_WRITE:
2688 		break;
2689 	case PACKET3_COPY_DATA:
2690 		if ((idx_value & 0xf00) == 0) {
2691 			reg = ib[idx + 3] * 4;
2692 			if (!si_vm_reg_valid(reg))
2693 				return -EINVAL;
2694 		}
2695 		break;
2696 	case PACKET3_WRITE_DATA:
2697 		if ((idx_value & 0xf00) == 0) {
2698 			start_reg = ib[idx + 1] * 4;
2699 			if (idx_value & 0x10000) {
2700 				if (!si_vm_reg_valid(start_reg))
2701 					return -EINVAL;
2702 			} else {
2703 				for (i = 0; i < (pkt->count - 2); i++) {
2704 					reg = start_reg + (4 * i);
2705 					if (!si_vm_reg_valid(reg))
2706 						return -EINVAL;
2707 				}
2708 			}
2709 		}
2710 		break;
2711 	case PACKET3_COND_WRITE:
2712 		if (idx_value & 0x100) {
2713 			reg = ib[idx + 5] * 4;
2714 			if (!si_vm_reg_valid(reg))
2715 				return -EINVAL;
2716 		}
2717 		break;
2718 	case PACKET3_COPY_DW:
2719 		if (idx_value & 0x2) {
2720 			reg = ib[idx + 3] * 4;
2721 			if (!si_vm_reg_valid(reg))
2722 				return -EINVAL;
2723 		}
2724 		break;
2725 	case PACKET3_SET_CONFIG_REG:
2726 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2727 		end_reg = 4 * pkt->count + start_reg - 4;
2728 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2729 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2730 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2731 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2732 			return -EINVAL;
2733 		}
2734 		for (i = 0; i < pkt->count; i++) {
2735 			reg = start_reg + (4 * i);
2736 			if (!si_vm_reg_valid(reg))
2737 				return -EINVAL;
2738 		}
2739 		break;
2740 	case PACKET3_CP_DMA:
2741 		command = ib[idx + 4];
2742 		info = ib[idx + 1];
2743 		if (command & PACKET3_CP_DMA_CMD_SAS) {
2744 			/* src address space is register */
2745 			if (((info & 0x60000000) >> 29) == 0) {
2746 				start_reg = idx_value << 2;
2747 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
2748 					reg = start_reg;
2749 					if (!si_vm_reg_valid(reg)) {
2750 						DRM_ERROR("CP DMA Bad SRC register\n");
2751 						return -EINVAL;
2752 					}
2753 				} else {
2754 					for (i = 0; i < (command & 0x1fffff); i++) {
2755 						reg = start_reg + (4 * i);
2756 						if (!si_vm_reg_valid(reg)) {
2757 							DRM_ERROR("CP DMA Bad SRC register\n");
2758 							return -EINVAL;
2759 						}
2760 					}
2761 				}
2762 			}
2763 		}
2764 		if (command & PACKET3_CP_DMA_CMD_DAS) {
2765 			/* dst address space is register */
2766 			if (((info & 0x00300000) >> 20) == 0) {
2767 				start_reg = ib[idx + 2];
2768 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
2769 					reg = start_reg;
2770 					if (!si_vm_reg_valid(reg)) {
2771 						DRM_ERROR("CP DMA Bad DST register\n");
2772 						return -EINVAL;
2773 					}
2774 				} else {
2775 					for (i = 0; i < (command & 0x1fffff); i++) {
2776 						reg = start_reg + (4 * i);
2777 						if (!si_vm_reg_valid(reg)) {
2778 							DRM_ERROR("CP DMA Bad DST register\n");
2779 							return -EINVAL;
2780 						}
2781 					}
2782 				}
2783 			}
2784 		}
2785 		break;
2786 	default:
2787 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2788 		return -EINVAL;
2789 	}
2790 	return 0;
2791 }
2792 
2793 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2794 				       u32 *ib, struct radeon_cs_packet *pkt)
2795 {
2796 	u32 idx = pkt->idx + 1;
2797 	u32 idx_value = ib[idx];
2798 	u32 start_reg, reg, i;
2799 
2800 	switch (pkt->opcode) {
2801 	case PACKET3_NOP:
2802 	case PACKET3_SET_BASE:
2803 	case PACKET3_CLEAR_STATE:
2804 	case PACKET3_DISPATCH_DIRECT:
2805 	case PACKET3_DISPATCH_INDIRECT:
2806 	case PACKET3_ALLOC_GDS:
2807 	case PACKET3_WRITE_GDS_RAM:
2808 	case PACKET3_ATOMIC_GDS:
2809 	case PACKET3_ATOMIC:
2810 	case PACKET3_OCCLUSION_QUERY:
2811 	case PACKET3_SET_PREDICATION:
2812 	case PACKET3_COND_EXEC:
2813 	case PACKET3_PRED_EXEC:
2814 	case PACKET3_CONTEXT_CONTROL:
2815 	case PACKET3_STRMOUT_BUFFER_UPDATE:
2816 	case PACKET3_WAIT_REG_MEM:
2817 	case PACKET3_MEM_WRITE:
2818 	case PACKET3_PFP_SYNC_ME:
2819 	case PACKET3_SURFACE_SYNC:
2820 	case PACKET3_EVENT_WRITE:
2821 	case PACKET3_EVENT_WRITE_EOP:
2822 	case PACKET3_EVENT_WRITE_EOS:
2823 	case PACKET3_SET_CONTEXT_REG:
2824 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2825 	case PACKET3_SET_SH_REG:
2826 	case PACKET3_SET_SH_REG_OFFSET:
2827 	case PACKET3_INCREMENT_DE_COUNTER:
2828 	case PACKET3_WAIT_ON_CE_COUNTER:
2829 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2830 	case PACKET3_ME_WRITE:
2831 		break;
2832 	case PACKET3_COPY_DATA:
2833 		if ((idx_value & 0xf00) == 0) {
2834 			reg = ib[idx + 3] * 4;
2835 			if (!si_vm_reg_valid(reg))
2836 				return -EINVAL;
2837 		}
2838 		break;
2839 	case PACKET3_WRITE_DATA:
2840 		if ((idx_value & 0xf00) == 0) {
2841 			start_reg = ib[idx + 1] * 4;
2842 			if (idx_value & 0x10000) {
2843 				if (!si_vm_reg_valid(start_reg))
2844 					return -EINVAL;
2845 			} else {
2846 				for (i = 0; i < (pkt->count - 2); i++) {
2847 					reg = start_reg + (4 * i);
2848 					if (!si_vm_reg_valid(reg))
2849 						return -EINVAL;
2850 				}
2851 			}
2852 		}
2853 		break;
2854 	case PACKET3_COND_WRITE:
2855 		if (idx_value & 0x100) {
2856 			reg = ib[idx + 5] * 4;
2857 			if (!si_vm_reg_valid(reg))
2858 				return -EINVAL;
2859 		}
2860 		break;
2861 	case PACKET3_COPY_DW:
2862 		if (idx_value & 0x2) {
2863 			reg = ib[idx + 3] * 4;
2864 			if (!si_vm_reg_valid(reg))
2865 				return -EINVAL;
2866 		}
2867 		break;
2868 	default:
2869 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2870 		return -EINVAL;
2871 	}
2872 	return 0;
2873 }
2874 
2875 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2876 {
2877 	int ret = 0;
2878 	u32 idx = 0;
2879 	struct radeon_cs_packet pkt;
2880 
2881 	do {
2882 		pkt.idx = idx;
2883 		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2884 		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2885 		pkt.one_reg_wr = 0;
2886 		switch (pkt.type) {
2887 		case PACKET_TYPE0:
2888 			dev_err(rdev->dev, "Packet0 not allowed!\n");
2889 			ret = -EINVAL;
2890 			break;
2891 		case PACKET_TYPE2:
2892 			idx += 1;
2893 			break;
2894 		case PACKET_TYPE3:
2895 			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2896 			if (ib->is_const_ib)
2897 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2898 			else {
2899 				switch (ib->ring) {
2900 				case RADEON_RING_TYPE_GFX_INDEX:
2901 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2902 					break;
2903 				case CAYMAN_RING_TYPE_CP1_INDEX:
2904 				case CAYMAN_RING_TYPE_CP2_INDEX:
2905 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2906 					break;
2907 				default:
2908 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
2909 					ret = -EINVAL;
2910 					break;
2911 				}
2912 			}
2913 			idx += pkt.count + 2;
2914 			break;
2915 		default:
2916 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2917 			ret = -EINVAL;
2918 			break;
2919 		}
2920 		if (ret)
2921 			break;
2922 	} while (idx < ib->length_dw);
2923 
2924 	return ret;
2925 }
2926 
2927 /*
2928  * vm
2929  */
2930 int si_vm_init(struct radeon_device *rdev)
2931 {
2932 	/* number of VMs */
2933 	rdev->vm_manager.nvm = 16;
2934 	/* base offset of vram pages */
2935 	rdev->vm_manager.vram_base_offset = 0;
2936 
2937 	return 0;
2938 }
2939 
2940 void si_vm_fini(struct radeon_device *rdev)
2941 {
2942 }
2943 
2944 /**
2945  * si_vm_set_page - update the page tables using the CP
2946  *
2947  * @rdev: radeon_device pointer
2948  * @pe: addr of the page entry
2949  * @addr: dst addr to write into pe
2950  * @count: number of page entries to update
2951  * @incr: increase next addr by incr bytes
2952  * @flags: access flags
2953  *
2954  * Update the page tables using the CP (cayman-si).
2955  */
2956 void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
2957 		    uint64_t addr, unsigned count,
2958 		    uint32_t incr, uint32_t flags)
2959 {
2960 	struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
2961 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2962 	uint64_t value;
2963 	unsigned ndw;
2964 
2965 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2966 		while (count) {
2967 			ndw = 2 + count * 2;
2968 			if (ndw > 0x3FFE)
2969 				ndw = 0x3FFE;
2970 
2971 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
2972 			radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2973 						 WRITE_DATA_DST_SEL(1)));
2974 			radeon_ring_write(ring, pe);
2975 			radeon_ring_write(ring, upper_32_bits(pe));
2976 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
2977 				if (flags & RADEON_VM_PAGE_SYSTEM) {
2978 					value = radeon_vm_map_gart(rdev, addr);
2979 					value &= 0xFFFFFFFFFFFFF000ULL;
2980 				} else if (flags & RADEON_VM_PAGE_VALID) {
2981 					value = addr;
2982 				} else {
2983 					value = 0;
2984 				}
2985 				addr += incr;
2986 				value |= r600_flags;
2987 				radeon_ring_write(ring, value);
2988 				radeon_ring_write(ring, upper_32_bits(value));
2989 			}
2990 		}
2991 	} else {
2992 		/* DMA */
2993 		if (flags & RADEON_VM_PAGE_SYSTEM) {
2994 			while (count) {
2995 				ndw = count * 2;
2996 				if (ndw > 0xFFFFE)
2997 					ndw = 0xFFFFE;
2998 
2999 				/* for non-physically contiguous pages (system) */
3000 				radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
3001 				radeon_ring_write(ring, pe);
3002 				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
3003 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3004 					if (flags & RADEON_VM_PAGE_SYSTEM) {
3005 						value = radeon_vm_map_gart(rdev, addr);
3006 						value &= 0xFFFFFFFFFFFFF000ULL;
3007 					} else if (flags & RADEON_VM_PAGE_VALID) {
3008 						value = addr;
3009 					} else {
3010 						value = 0;
3011 					}
3012 					addr += incr;
3013 					value |= r600_flags;
3014 					radeon_ring_write(ring, value);
3015 					radeon_ring_write(ring, upper_32_bits(value));
3016 				}
3017 			}
3018 		} else {
3019 			while (count) {
3020 				ndw = count * 2;
3021 				if (ndw > 0xFFFFE)
3022 					ndw = 0xFFFFE;
3023 
3024 				if (flags & RADEON_VM_PAGE_VALID)
3025 					value = addr;
3026 				else
3027 					value = 0;
3028 				/* for physically contiguous pages (vram) */
3029 				radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
3030 				radeon_ring_write(ring, pe); /* dst addr */
3031 				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
3032 				radeon_ring_write(ring, r600_flags); /* mask */
3033 				radeon_ring_write(ring, 0);
3034 				radeon_ring_write(ring, value); /* value */
3035 				radeon_ring_write(ring, upper_32_bits(value));
3036 				radeon_ring_write(ring, incr); /* increment size */
3037 				radeon_ring_write(ring, 0);
3038 				pe += ndw * 4;
3039 				addr += (ndw / 2) * incr;
3040 				count -= ndw / 2;
3041 			}
3042 		}
3043 	}
3044 }
3045 
3046 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3047 {
3048 	struct radeon_ring *ring = &rdev->ring[ridx];
3049 
3050 	if (vm == NULL)
3051 		return;
3052 
3053 	/* write new base address */
3054 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3055 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3056 				 WRITE_DATA_DST_SEL(0)));
3057 
3058 	if (vm->id < 8) {
3059 		radeon_ring_write(ring,
3060 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3061 	} else {
3062 		radeon_ring_write(ring,
3063 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3064 	}
3065 	radeon_ring_write(ring, 0);
3066 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3067 
3068 	/* flush hdp cache */
3069 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3070 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3071 				 WRITE_DATA_DST_SEL(0)));
3072 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3073 	radeon_ring_write(ring, 0);
3074 	radeon_ring_write(ring, 0x1);
3075 
3076 	/* bits 0-15 are the VM contexts0-15 */
3077 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3078 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3079 				 WRITE_DATA_DST_SEL(0)));
3080 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3081 	radeon_ring_write(ring, 0);
3082 	radeon_ring_write(ring, 1 << vm->id);
3083 
3084 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
3085 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3086 	radeon_ring_write(ring, 0x0);
3087 }
3088 
3089 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3090 {
3091 	struct radeon_ring *ring = &rdev->ring[ridx];
3092 
3093 	if (vm == NULL)
3094 		return;
3095 
3096 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3097 	if (vm->id < 8) {
3098 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
3099 	} else {
3100 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
3101 	}
3102 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3103 
3104 	/* flush hdp cache */
3105 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3106 	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
3107 	radeon_ring_write(ring, 1);
3108 
3109 	/* bits 0-7 are the VM contexts0-7 */
3110 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3111 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
3112 	radeon_ring_write(ring, 1 << vm->id);
3113 }
3114 
3115 /*
3116  * RLC
3117  */
3118 void si_rlc_fini(struct radeon_device *rdev)
3119 {
3120 	int r;
3121 
3122 	/* save restore block */
3123 	if (rdev->rlc.save_restore_obj) {
3124 		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
3125 		if (unlikely(r != 0))
3126 			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
3127 		radeon_bo_unpin(rdev->rlc.save_restore_obj);
3128 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
3129 
3130 		radeon_bo_unref(&rdev->rlc.save_restore_obj);
3131 		rdev->rlc.save_restore_obj = NULL;
3132 	}
3133 
3134 	/* clear state block */
3135 	if (rdev->rlc.clear_state_obj) {
3136 		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
3137 		if (unlikely(r != 0))
3138 			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
3139 		radeon_bo_unpin(rdev->rlc.clear_state_obj);
3140 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
3141 
3142 		radeon_bo_unref(&rdev->rlc.clear_state_obj);
3143 		rdev->rlc.clear_state_obj = NULL;
3144 	}
3145 }
3146 
3147 int si_rlc_init(struct radeon_device *rdev)
3148 {
3149 	int r;
3150 
3151 	/* save restore block */
3152 	if (rdev->rlc.save_restore_obj == NULL) {
3153 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
3154 				     RADEON_GEM_DOMAIN_VRAM, NULL,
3155 				     &rdev->rlc.save_restore_obj);
3156 		if (r) {
3157 			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
3158 			return r;
3159 		}
3160 	}
3161 
3162 	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
3163 	if (unlikely(r != 0)) {
3164 		si_rlc_fini(rdev);
3165 		return r;
3166 	}
3167 	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
3168 			  &rdev->rlc.save_restore_gpu_addr);
3169 	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
3170 	if (r) {
3171 		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
3172 		si_rlc_fini(rdev);
3173 		return r;
3174 	}
3175 
3176 	/* clear state block */
3177 	if (rdev->rlc.clear_state_obj == NULL) {
3178 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
3179 				     RADEON_GEM_DOMAIN_VRAM, NULL,
3180 				     &rdev->rlc.clear_state_obj);
3181 		if (r) {
3182 			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
3183 			si_rlc_fini(rdev);
3184 			return r;
3185 		}
3186 	}
3187 	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
3188 	if (unlikely(r != 0)) {
3189 		si_rlc_fini(rdev);
3190 		return r;
3191 	}
3192 	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
3193 			  &rdev->rlc.clear_state_gpu_addr);
3194 	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
3195 	if (r) {
3196 		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
3197 		si_rlc_fini(rdev);
3198 		return r;
3199 	}
3200 
3201 	return 0;
3202 }
3203 
3204 static void si_rlc_stop(struct radeon_device *rdev)
3205 {
3206 	WREG32(RLC_CNTL, 0);
3207 }
3208 
3209 static void si_rlc_start(struct radeon_device *rdev)
3210 {
3211 	WREG32(RLC_CNTL, RLC_ENABLE);
3212 }
3213 
3214 static int si_rlc_resume(struct radeon_device *rdev)
3215 {
3216 	u32 i;
3217 	const __be32 *fw_data;
3218 
3219 	if (!rdev->rlc_fw)
3220 		return -EINVAL;
3221 
3222 	si_rlc_stop(rdev);
3223 
3224 	WREG32(RLC_RL_BASE, 0);
3225 	WREG32(RLC_RL_SIZE, 0);
3226 	WREG32(RLC_LB_CNTL, 0);
3227 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
3228 	WREG32(RLC_LB_CNTR_INIT, 0);
3229 
3230 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
3231 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
3232 
3233 	WREG32(RLC_MC_CNTL, 0);
3234 	WREG32(RLC_UCODE_CNTL, 0);
3235 
3236 	fw_data = (const __be32 *)rdev->rlc_fw->data;
3237 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
3238 		WREG32(RLC_UCODE_ADDR, i);
3239 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
3240 	}
3241 	WREG32(RLC_UCODE_ADDR, 0);
3242 
3243 	si_rlc_start(rdev);
3244 
3245 	return 0;
3246 }
3247 
3248 static void si_enable_interrupts(struct radeon_device *rdev)
3249 {
3250 	u32 ih_cntl = RREG32(IH_CNTL);
3251 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3252 
3253 	ih_cntl |= ENABLE_INTR;
3254 	ih_rb_cntl |= IH_RB_ENABLE;
3255 	WREG32(IH_CNTL, ih_cntl);
3256 	WREG32(IH_RB_CNTL, ih_rb_cntl);
3257 	rdev->ih.enabled = true;
3258 }
3259 
3260 static void si_disable_interrupts(struct radeon_device *rdev)
3261 {
3262 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3263 	u32 ih_cntl = RREG32(IH_CNTL);
3264 
3265 	ih_rb_cntl &= ~IH_RB_ENABLE;
3266 	ih_cntl &= ~ENABLE_INTR;
3267 	WREG32(IH_RB_CNTL, ih_rb_cntl);
3268 	WREG32(IH_CNTL, ih_cntl);
3269 	/* set rptr, wptr to 0 */
3270 	WREG32(IH_RB_RPTR, 0);
3271 	WREG32(IH_RB_WPTR, 0);
3272 	rdev->ih.enabled = false;
3273 	rdev->ih.rptr = 0;
3274 }
3275 
3276 static void si_disable_interrupt_state(struct radeon_device *rdev)
3277 {
3278 	u32 tmp;
3279 
3280 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3281 	WREG32(CP_INT_CNTL_RING1, 0);
3282 	WREG32(CP_INT_CNTL_RING2, 0);
3283 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3284 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
3285 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3286 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
3287 	WREG32(GRBM_INT_CNTL, 0);
3288 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3289 	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3290 	if (rdev->num_crtc >= 4) {
3291 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3292 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3293 	}
3294 	if (rdev->num_crtc >= 6) {
3295 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3296 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3297 	}
3298 
3299 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3300 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3301 	if (rdev->num_crtc >= 4) {
3302 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3303 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3304 	}
3305 	if (rdev->num_crtc >= 6) {
3306 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3307 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3308 	}
3309 
3310 	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
3311 
3312 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3313 	WREG32(DC_HPD1_INT_CONTROL, tmp);
3314 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3315 	WREG32(DC_HPD2_INT_CONTROL, tmp);
3316 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3317 	WREG32(DC_HPD3_INT_CONTROL, tmp);
3318 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3319 	WREG32(DC_HPD4_INT_CONTROL, tmp);
3320 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3321 	WREG32(DC_HPD5_INT_CONTROL, tmp);
3322 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3323 	WREG32(DC_HPD6_INT_CONTROL, tmp);
3324 
3325 }
3326 
3327 static int si_irq_init(struct radeon_device *rdev)
3328 {
3329 	int ret = 0;
3330 	int rb_bufsz;
3331 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3332 
3333 	/* allocate ring */
3334 	ret = r600_ih_ring_alloc(rdev);
3335 	if (ret)
3336 		return ret;
3337 
3338 	/* disable irqs */
3339 	si_disable_interrupts(rdev);
3340 
3341 	/* init rlc */
3342 	ret = si_rlc_resume(rdev);
3343 	if (ret) {
3344 		r600_ih_ring_fini(rdev);
3345 		return ret;
3346 	}
3347 
3348 	/* setup interrupt control */
3349 	/* set dummy read address to ring address */
3350 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3351 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
3352 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3353 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3354 	 */
3355 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3356 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3357 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3358 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
3359 
3360 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3361 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3362 
3363 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3364 		      IH_WPTR_OVERFLOW_CLEAR |
3365 		      (rb_bufsz << 1));
3366 
3367 	if (rdev->wb.enabled)
3368 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3369 
3370 	/* set the writeback address whether it's enabled or not */
3371 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3372 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3373 
3374 	WREG32(IH_RB_CNTL, ih_rb_cntl);
3375 
3376 	/* set rptr, wptr to 0 */
3377 	WREG32(IH_RB_RPTR, 0);
3378 	WREG32(IH_RB_WPTR, 0);
3379 
3380 	/* Default settings for IH_CNTL (disabled at first) */
3381 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3382 	/* RPTR_REARM only works if msi's are enabled */
3383 	if (rdev->msi_enabled)
3384 		ih_cntl |= RPTR_REARM;
3385 	WREG32(IH_CNTL, ih_cntl);
3386 
3387 	/* force the active interrupt state to all disabled */
3388 	si_disable_interrupt_state(rdev);
3389 
3390 	pci_enable_busmaster(rdev->dev);
3391 
3392 	/* enable irqs */
3393 	si_enable_interrupts(rdev);
3394 
3395 	return ret;
3396 }
3397 
3398 int si_irq_set(struct radeon_device *rdev)
3399 {
3400 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
3401 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
3402 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3403 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3404 	u32 grbm_int_cntl = 0;
3405 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3406 	u32 dma_cntl, dma_cntl1;
3407 
3408 	if (!rdev->irq.installed) {
3409 		DRM_ERROR("Can't enable IRQ/MSI because no handler is installed\n");
3410 		return -EINVAL;
3411 	}
3412 	/* don't enable anything if the ih is disabled */
3413 	if (!rdev->ih.enabled) {
3414 		si_disable_interrupts(rdev);
3415 		/* force the active interrupt state to all disabled */
3416 		si_disable_interrupt_state(rdev);
3417 		return 0;
3418 	}
3419 
3420 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3421 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3422 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3423 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3424 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3425 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3426 
3427 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3428 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3429 
3430 	/* enable CP interrupts on all rings */
3431 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3432 		DRM_DEBUG("si_irq_set: sw int gfx\n");
3433 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3434 	}
3435 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
3436 		DRM_DEBUG("si_irq_set: sw int cp1\n");
3437 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
3438 	}
3439 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
3440 		DRM_DEBUG("si_irq_set: sw int cp2\n");
3441 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3442 	}
3443 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3444 		DRM_DEBUG("si_irq_set: sw int dma\n");
3445 		dma_cntl |= TRAP_ENABLE;
3446 	}
3447 
3448 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
3449 		DRM_DEBUG("si_irq_set: sw int dma1\n");
3450 		dma_cntl1 |= TRAP_ENABLE;
3451 	}
3452 	if (rdev->irq.crtc_vblank_int[0] ||
3453 	    atomic_read(&rdev->irq.pflip[0])) {
3454 		DRM_DEBUG("si_irq_set: vblank 0\n");
3455 		crtc1 |= VBLANK_INT_MASK;
3456 	}
3457 	if (rdev->irq.crtc_vblank_int[1] ||
3458 	    atomic_read(&rdev->irq.pflip[1])) {
3459 		DRM_DEBUG("si_irq_set: vblank 1\n");
3460 		crtc2 |= VBLANK_INT_MASK;
3461 	}
3462 	if (rdev->irq.crtc_vblank_int[2] ||
3463 	    atomic_read(&rdev->irq.pflip[2])) {
3464 		DRM_DEBUG("si_irq_set: vblank 2\n");
3465 		crtc3 |= VBLANK_INT_MASK;
3466 	}
3467 	if (rdev->irq.crtc_vblank_int[3] ||
3468 	    atomic_read(&rdev->irq.pflip[3])) {
3469 		DRM_DEBUG("si_irq_set: vblank 3\n");
3470 		crtc4 |= VBLANK_INT_MASK;
3471 	}
3472 	if (rdev->irq.crtc_vblank_int[4] ||
3473 	    atomic_read(&rdev->irq.pflip[4])) {
3474 		DRM_DEBUG("si_irq_set: vblank 4\n");
3475 		crtc5 |= VBLANK_INT_MASK;
3476 	}
3477 	if (rdev->irq.crtc_vblank_int[5] ||
3478 	    atomic_read(&rdev->irq.pflip[5])) {
3479 		DRM_DEBUG("si_irq_set: vblank 5\n");
3480 		crtc6 |= VBLANK_INT_MASK;
3481 	}
3482 	if (rdev->irq.hpd[0]) {
3483 		DRM_DEBUG("si_irq_set: hpd 1\n");
3484 		hpd1 |= DC_HPDx_INT_EN;
3485 	}
3486 	if (rdev->irq.hpd[1]) {
3487 		DRM_DEBUG("si_irq_set: hpd 2\n");
3488 		hpd2 |= DC_HPDx_INT_EN;
3489 	}
3490 	if (rdev->irq.hpd[2]) {
3491 		DRM_DEBUG("si_irq_set: hpd 3\n");
3492 		hpd3 |= DC_HPDx_INT_EN;
3493 	}
3494 	if (rdev->irq.hpd[3]) {
3495 		DRM_DEBUG("si_irq_set: hpd 4\n");
3496 		hpd4 |= DC_HPDx_INT_EN;
3497 	}
3498 	if (rdev->irq.hpd[4]) {
3499 		DRM_DEBUG("si_irq_set: hpd 5\n");
3500 		hpd5 |= DC_HPDx_INT_EN;
3501 	}
3502 	if (rdev->irq.hpd[5]) {
3503 		DRM_DEBUG("si_irq_set: hpd 6\n");
3504 		hpd6 |= DC_HPDx_INT_EN;
3505 	}
3506 
3507 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3508 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3509 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3510 
3511 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
3512 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
3513 
3514 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3515 
3516 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3517 	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3518 	if (rdev->num_crtc >= 4) {
3519 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3520 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3521 	}
3522 	if (rdev->num_crtc >= 6) {
3523 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3524 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3525 	}
3526 
3527 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
3528 	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
3529 	if (rdev->num_crtc >= 4) {
3530 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
3531 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
3532 	}
3533 	if (rdev->num_crtc >= 6) {
3534 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
3535 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
3536 	}
3537 
3538 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
3539 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
3540 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
3541 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
3542 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
3543 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
3544 
3545 	return 0;
3546 }
3547 
3548 static inline void si_irq_ack(struct radeon_device *rdev)
3549 {
3550 	u32 tmp;
3551 
3552 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3553 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3554 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3555 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3556 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3557 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3558 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
3559 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
3560 	if (rdev->num_crtc >= 4) {
3561 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
3562 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
3563 	}
3564 	if (rdev->num_crtc >= 6) {
3565 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
3566 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
3567 	}
3568 
3569 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
3570 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3571 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
3572 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3573 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
3574 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3575 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
3576 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3577 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3578 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3579 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3580 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3581 
3582 	if (rdev->num_crtc >= 4) {
3583 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
3584 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3585 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
3586 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3587 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3588 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3589 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3590 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3591 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3592 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3593 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3594 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3595 	}
3596 
3597 	if (rdev->num_crtc >= 6) {
3598 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
3599 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3600 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
3601 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3602 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3603 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3604 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3605 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3606 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3607 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3608 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3609 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3610 	}
3611 
3612 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3613 		tmp = RREG32(DC_HPD1_INT_CONTROL);
3614 		tmp |= DC_HPDx_INT_ACK;
3615 		WREG32(DC_HPD1_INT_CONTROL, tmp);
3616 	}
3617 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3618 		tmp = RREG32(DC_HPD2_INT_CONTROL);
3619 		tmp |= DC_HPDx_INT_ACK;
3620 		WREG32(DC_HPD2_INT_CONTROL, tmp);
3621 	}
3622 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3623 		tmp = RREG32(DC_HPD3_INT_CONTROL);
3624 		tmp |= DC_HPDx_INT_ACK;
3625 		WREG32(DC_HPD3_INT_CONTROL, tmp);
3626 	}
3627 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3628 		tmp = RREG32(DC_HPD4_INT_CONTROL);
3629 		tmp |= DC_HPDx_INT_ACK;
3630 		WREG32(DC_HPD4_INT_CONTROL, tmp);
3631 	}
3632 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3633 		tmp = RREG32(DC_HPD5_INT_CONTROL);
3634 		tmp |= DC_HPDx_INT_ACK;
3635 		WREG32(DC_HPD5_INT_CONTROL, tmp);
3636 	}
3637 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3638 		tmp = RREG32(DC_HPD5_INT_CONTROL);
3639 		tmp |= DC_HPDx_INT_ACK;
3640 		WREG32(DC_HPD6_INT_CONTROL, tmp);
3641 	}
3642 }
3643 
3644 static void si_irq_disable(struct radeon_device *rdev)
3645 {
3646 	si_disable_interrupts(rdev);
3647 	/* Wait and acknowledge irq */
3648 	DRM_MDELAY(1);
3649 	si_irq_ack(rdev);
3650 	si_disable_interrupt_state(rdev);
3651 }
3652 
3653 static void si_irq_suspend(struct radeon_device *rdev)
3654 {
3655 	si_irq_disable(rdev);
3656 	si_rlc_stop(rdev);
3657 }
3658 
3659 static void si_irq_fini(struct radeon_device *rdev)
3660 {
3661 	si_irq_suspend(rdev);
3662 	r600_ih_ring_fini(rdev);
3663 }
3664 
3665 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
3666 {
3667 	u32 wptr, tmp;
3668 
3669 	if (rdev->wb.enabled)
3670 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3671 	else
3672 		wptr = RREG32(IH_RB_WPTR);
3673 
3674 	if (wptr & RB_OVERFLOW) {
3675 		/* When a ring buffer overflow happen start parsing interrupt
3676 		 * from the last not overwritten vector (wptr + 16). Hopefully
3677 		 * this should allow us to catchup.
3678 		 */
3679 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3680 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3681 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3682 		tmp = RREG32(IH_RB_CNTL);
3683 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
3684 		WREG32(IH_RB_CNTL, tmp);
3685 	}
3686 	return (wptr & rdev->ih.ptr_mask);
3687 }
3688 
3689 /*        SI IV Ring
3690  * Each IV ring entry is 128 bits:
3691  * [7:0]    - interrupt source id
3692  * [31:8]   - reserved
3693  * [59:32]  - interrupt source data
3694  * [63:60]  - reserved
3695  * [71:64]  - RINGID
3696  * [79:72]  - VMID
3697  * [127:80] - reserved
3698  */
3699 irqreturn_t si_irq_process(struct radeon_device *rdev)
3700 {
3701 	u32 wptr;
3702 	u32 rptr;
3703 	u32 src_id, src_data, ring_id;
3704 	u32 ring_index;
3705 	bool queue_hotplug = false;
3706 
3707 	if (!rdev->ih.enabled || rdev->shutdown)
3708 		return IRQ_NONE;
3709 
3710 	wptr = si_get_ih_wptr(rdev);
3711 
3712 restart_ih:
3713 	/* is somebody else already processing irqs? */
3714 	if (atomic_xchg(&rdev->ih.lock, 1))
3715 		return IRQ_NONE;
3716 
3717 	rptr = rdev->ih.rptr;
3718 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3719 
3720 	/* Order reading of wptr vs. reading of IH ring data */
3721 	cpu_lfence();
3722 
3723 	/* display interrupts */
3724 	si_irq_ack(rdev);
3725 
3726 	while (rptr != wptr) {
3727 		/* wptr/rptr are in bytes! */
3728 		ring_index = rptr / 4;
3729 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3730 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3731 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3732 
3733 		switch (src_id) {
3734 		case 1: /* D1 vblank/vline */
3735 			switch (src_data) {
3736 			case 0: /* D1 vblank */
3737 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
3738 					if (rdev->irq.crtc_vblank_int[0]) {
3739 						drm_handle_vblank(rdev->ddev, 0);
3740 						rdev->pm.vblank_sync = true;
3741 						DRM_WAKEUP(&rdev->irq.vblank_queue);
3742 					}
3743 					if (atomic_read(&rdev->irq.pflip[0]))
3744 						radeon_crtc_handle_flip(rdev, 0);
3745 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3746 					DRM_DEBUG("IH: D1 vblank\n");
3747 				}
3748 				break;
3749 			case 1: /* D1 vline */
3750 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
3751 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3752 					DRM_DEBUG("IH: D1 vline\n");
3753 				}
3754 				break;
3755 			default:
3756 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3757 				break;
3758 			}
3759 			break;
3760 		case 2: /* D2 vblank/vline */
3761 			switch (src_data) {
3762 			case 0: /* D2 vblank */
3763 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3764 					if (rdev->irq.crtc_vblank_int[1]) {
3765 						drm_handle_vblank(rdev->ddev, 1);
3766 						rdev->pm.vblank_sync = true;
3767 						DRM_WAKEUP(&rdev->irq.vblank_queue);
3768 					}
3769 					if (atomic_read(&rdev->irq.pflip[1]))
3770 						radeon_crtc_handle_flip(rdev, 1);
3771 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3772 					DRM_DEBUG("IH: D2 vblank\n");
3773 				}
3774 				break;
3775 			case 1: /* D2 vline */
3776 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3777 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3778 					DRM_DEBUG("IH: D2 vline\n");
3779 				}
3780 				break;
3781 			default:
3782 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3783 				break;
3784 			}
3785 			break;
3786 		case 3: /* D3 vblank/vline */
3787 			switch (src_data) {
3788 			case 0: /* D3 vblank */
3789 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3790 					if (rdev->irq.crtc_vblank_int[2]) {
3791 						drm_handle_vblank(rdev->ddev, 2);
3792 						rdev->pm.vblank_sync = true;
3793 						DRM_WAKEUP(&rdev->irq.vblank_queue);
3794 					}
3795 					if (atomic_read(&rdev->irq.pflip[2]))
3796 						radeon_crtc_handle_flip(rdev, 2);
3797 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3798 					DRM_DEBUG("IH: D3 vblank\n");
3799 				}
3800 				break;
3801 			case 1: /* D3 vline */
3802 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3803 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3804 					DRM_DEBUG("IH: D3 vline\n");
3805 				}
3806 				break;
3807 			default:
3808 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3809 				break;
3810 			}
3811 			break;
3812 		case 4: /* D4 vblank/vline */
3813 			switch (src_data) {
3814 			case 0: /* D4 vblank */
3815 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3816 					if (rdev->irq.crtc_vblank_int[3]) {
3817 						drm_handle_vblank(rdev->ddev, 3);
3818 						rdev->pm.vblank_sync = true;
3819 						DRM_WAKEUP(&rdev->irq.vblank_queue);
3820 					}
3821 					if (atomic_read(&rdev->irq.pflip[3]))
3822 						radeon_crtc_handle_flip(rdev, 3);
3823 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3824 					DRM_DEBUG("IH: D4 vblank\n");
3825 				}
3826 				break;
3827 			case 1: /* D4 vline */
3828 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3829 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3830 					DRM_DEBUG("IH: D4 vline\n");
3831 				}
3832 				break;
3833 			default:
3834 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3835 				break;
3836 			}
3837 			break;
3838 		case 5: /* D5 vblank/vline */
3839 			switch (src_data) {
3840 			case 0: /* D5 vblank */
3841 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3842 					if (rdev->irq.crtc_vblank_int[4]) {
3843 						drm_handle_vblank(rdev->ddev, 4);
3844 						rdev->pm.vblank_sync = true;
3845 						DRM_WAKEUP(&rdev->irq.vblank_queue);
3846 					}
3847 					if (atomic_read(&rdev->irq.pflip[4]))
3848 						radeon_crtc_handle_flip(rdev, 4);
3849 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3850 					DRM_DEBUG("IH: D5 vblank\n");
3851 				}
3852 				break;
3853 			case 1: /* D5 vline */
3854 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3855 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3856 					DRM_DEBUG("IH: D5 vline\n");
3857 				}
3858 				break;
3859 			default:
3860 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3861 				break;
3862 			}
3863 			break;
3864 		case 6: /* D6 vblank/vline */
3865 			switch (src_data) {
3866 			case 0: /* D6 vblank */
3867 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3868 					if (rdev->irq.crtc_vblank_int[5]) {
3869 						drm_handle_vblank(rdev->ddev, 5);
3870 						rdev->pm.vblank_sync = true;
3871 						DRM_WAKEUP(&rdev->irq.vblank_queue);
3872 					}
3873 					if (atomic_read(&rdev->irq.pflip[5]))
3874 						radeon_crtc_handle_flip(rdev, 5);
3875 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3876 					DRM_DEBUG("IH: D6 vblank\n");
3877 				}
3878 				break;
3879 			case 1: /* D6 vline */
3880 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3881 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3882 					DRM_DEBUG("IH: D6 vline\n");
3883 				}
3884 				break;
3885 			default:
3886 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3887 				break;
3888 			}
3889 			break;
3890 		case 42: /* HPD hotplug */
3891 			switch (src_data) {
3892 			case 0:
3893 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3894 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
3895 					queue_hotplug = true;
3896 					DRM_DEBUG("IH: HPD1\n");
3897 				}
3898 				break;
3899 			case 1:
3900 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3901 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3902 					queue_hotplug = true;
3903 					DRM_DEBUG("IH: HPD2\n");
3904 				}
3905 				break;
3906 			case 2:
3907 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3908 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3909 					queue_hotplug = true;
3910 					DRM_DEBUG("IH: HPD3\n");
3911 				}
3912 				break;
3913 			case 3:
3914 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3915 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3916 					queue_hotplug = true;
3917 					DRM_DEBUG("IH: HPD4\n");
3918 				}
3919 				break;
3920 			case 4:
3921 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3922 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3923 					queue_hotplug = true;
3924 					DRM_DEBUG("IH: HPD5\n");
3925 				}
3926 				break;
3927 			case 5:
3928 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3929 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3930 					queue_hotplug = true;
3931 					DRM_DEBUG("IH: HPD6\n");
3932 				}
3933 				break;
3934 			default:
3935 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3936 				break;
3937 			}
3938 			break;
3939 		case 146:
3940 		case 147:
3941 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
3942 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3943 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3944 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3945 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3946 			/* reset addr and status */
3947 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
3948 			break;
3949 		case 176: /* RINGID0 CP_INT */
3950 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3951 			break;
3952 		case 177: /* RINGID1 CP_INT */
3953 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3954 			break;
3955 		case 178: /* RINGID2 CP_INT */
3956 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3957 			break;
3958 		case 181: /* CP EOP event */
3959 			DRM_DEBUG("IH: CP EOP\n");
3960 			switch (ring_id) {
3961 			case 0:
3962 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3963 				break;
3964 			case 1:
3965 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3966 				break;
3967 			case 2:
3968 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3969 				break;
3970 			}
3971 			break;
3972 		case 224: /* DMA trap event */
3973 			DRM_DEBUG("IH: DMA trap\n");
3974 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3975 			break;
3976 		case 233: /* GUI IDLE */
3977 			DRM_DEBUG("IH: GUI idle\n");
3978 			break;
3979 		case 244: /* DMA trap event */
3980 			DRM_DEBUG("IH: DMA1 trap\n");
3981 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3982 			break;
3983 		default:
3984 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3985 			break;
3986 		}
3987 
3988 		/* wptr/rptr are in bytes! */
3989 		rptr += 16;
3990 		rptr &= rdev->ih.ptr_mask;
3991 	}
3992 	if (queue_hotplug)
3993 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
3994 	rdev->ih.rptr = rptr;
3995 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
3996 	atomic_set(&rdev->ih.lock, 0);
3997 
3998 	/* make sure wptr hasn't changed while processing */
3999 	wptr = si_get_ih_wptr(rdev);
4000 	if (wptr != rptr)
4001 		goto restart_ih;
4002 
4003 	return IRQ_HANDLED;
4004 }
4005 
4006 /**
4007  * si_copy_dma - copy pages using the DMA engine
4008  *
4009  * @rdev: radeon_device pointer
4010  * @src_offset: src GPU address
4011  * @dst_offset: dst GPU address
4012  * @num_gpu_pages: number of GPU pages to xfer
4013  * @fence: radeon fence object
4014  *
4015  * Copy GPU paging using the DMA engine (SI).
4016  * Used by the radeon ttm implementation to move pages if
4017  * registered as the asic copy callback.
4018  */
4019 int si_copy_dma(struct radeon_device *rdev,
4020 		uint64_t src_offset, uint64_t dst_offset,
4021 		unsigned num_gpu_pages,
4022 		struct radeon_fence **fence)
4023 {
4024 	struct radeon_semaphore *sem = NULL;
4025 	int ring_index = rdev->asic->copy.dma_ring_index;
4026 	struct radeon_ring *ring = &rdev->ring[ring_index];
4027 	u32 size_in_bytes, cur_size_in_bytes;
4028 	int i, num_loops;
4029 	int r = 0;
4030 
4031 	r = radeon_semaphore_create(rdev, &sem);
4032 	if (r) {
4033 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4034 		return r;
4035 	}
4036 
4037 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4038 	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
4039 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
4040 	if (r) {
4041 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4042 		radeon_semaphore_free(rdev, &sem, NULL);
4043 		return r;
4044 	}
4045 
4046 	if (radeon_fence_need_sync(*fence, ring->idx)) {
4047 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4048 					    ring->idx);
4049 		radeon_fence_note_sync(*fence, ring->idx);
4050 	} else {
4051 		radeon_semaphore_free(rdev, &sem, NULL);
4052 	}
4053 
4054 	for (i = 0; i < num_loops; i++) {
4055 		cur_size_in_bytes = size_in_bytes;
4056 		if (cur_size_in_bytes > 0xFFFFF)
4057 			cur_size_in_bytes = 0xFFFFF;
4058 		size_in_bytes -= cur_size_in_bytes;
4059 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
4060 		radeon_ring_write(ring, dst_offset & 0xffffffff);
4061 		radeon_ring_write(ring, src_offset & 0xffffffff);
4062 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
4063 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
4064 		src_offset += cur_size_in_bytes;
4065 		dst_offset += cur_size_in_bytes;
4066 	}
4067 
4068 	r = radeon_fence_emit(rdev, fence, ring->idx);
4069 	if (r) {
4070 		radeon_ring_unlock_undo(rdev, ring);
4071 		return r;
4072 	}
4073 
4074 	radeon_ring_unlock_commit(rdev, ring);
4075 	radeon_semaphore_free(rdev, &sem, *fence);
4076 
4077 	return r;
4078 }
4079 
4080 /*
4081  * startup/shutdown callbacks
4082  */
4083 static int si_startup(struct radeon_device *rdev)
4084 {
4085 	struct radeon_ring *ring;
4086 	int r;
4087 
4088 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4089 	    !rdev->rlc_fw || !rdev->mc_fw) {
4090 		r = si_init_microcode(rdev);
4091 		if (r) {
4092 			DRM_ERROR("Failed to load firmware!\n");
4093 			return r;
4094 		}
4095 	}
4096 
4097 	r = si_mc_load_microcode(rdev);
4098 	if (r) {
4099 		DRM_ERROR("Failed to load MC firmware!\n");
4100 		return r;
4101 	}
4102 
4103 	r = r600_vram_scratch_init(rdev);
4104 	if (r)
4105 		return r;
4106 
4107 	si_mc_program(rdev);
4108 	r = si_pcie_gart_enable(rdev);
4109 	if (r)
4110 		return r;
4111 	si_gpu_init(rdev);
4112 
4113 #if 0
4114 	r = evergreen_blit_init(rdev);
4115 	if (r) {
4116 		r600_blit_fini(rdev);
4117 		rdev->asic->copy = NULL;
4118 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
4119 	}
4120 #endif
4121 	/* allocate rlc buffers */
4122 	r = si_rlc_init(rdev);
4123 	if (r) {
4124 		DRM_ERROR("Failed to init rlc BOs!\n");
4125 		return r;
4126 	}
4127 
4128 	/* allocate wb buffer */
4129 	r = radeon_wb_init(rdev);
4130 	if (r)
4131 		return r;
4132 
4133 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4134 	if (r) {
4135 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4136 		return r;
4137 	}
4138 
4139 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4140 	if (r) {
4141 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4142 		return r;
4143 	}
4144 
4145 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4146 	if (r) {
4147 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4148 		return r;
4149 	}
4150 
4151 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4152 	if (r) {
4153 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4154 		return r;
4155 	}
4156 
4157 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4158 	if (r) {
4159 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4160 		return r;
4161 	}
4162 
4163 	/* Enable IRQ */
4164 	r = si_irq_init(rdev);
4165 	if (r) {
4166 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
4167 		radeon_irq_kms_fini(rdev);
4168 		return r;
4169 	}
4170 	si_irq_set(rdev);
4171 
4172 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4173 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4174 			     CP_RB0_RPTR, CP_RB0_WPTR,
4175 			     0, 0xfffff, RADEON_CP_PACKET2);
4176 	if (r)
4177 		return r;
4178 
4179 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
4180 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
4181 			     CP_RB1_RPTR, CP_RB1_WPTR,
4182 			     0, 0xfffff, RADEON_CP_PACKET2);
4183 	if (r)
4184 		return r;
4185 
4186 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
4187 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
4188 			     CP_RB2_RPTR, CP_RB2_WPTR,
4189 			     0, 0xfffff, RADEON_CP_PACKET2);
4190 	if (r)
4191 		return r;
4192 
4193 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4194 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4195 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
4196 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
4197 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4198 	if (r)
4199 		return r;
4200 
4201 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4202 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4203 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
4204 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
4205 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4206 	if (r)
4207 		return r;
4208 
4209 	r = si_cp_load_microcode(rdev);
4210 	if (r)
4211 		return r;
4212 	r = si_cp_resume(rdev);
4213 	if (r)
4214 		return r;
4215 
4216 	r = cayman_dma_resume(rdev);
4217 	if (r)
4218 		return r;
4219 
4220 	r = radeon_ib_pool_init(rdev);
4221 	if (r) {
4222 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4223 		return r;
4224 	}
4225 
4226 	r = radeon_vm_manager_init(rdev);
4227 	if (r) {
4228 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4229 		return r;
4230 	}
4231 
4232 	return 0;
4233 }
4234 
4235 int si_resume(struct radeon_device *rdev)
4236 {
4237 	int r;
4238 
4239 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
4240 	 * posting will perform necessary task to bring back GPU into good
4241 	 * shape.
4242 	 */
4243 	/* post card */
4244 	atom_asic_init(rdev->mode_info.atom_context);
4245 
4246 	rdev->accel_working = true;
4247 	r = si_startup(rdev);
4248 	if (r) {
4249 		DRM_ERROR("si startup failed on resume\n");
4250 		rdev->accel_working = false;
4251 		return r;
4252 	}
4253 
4254 	return r;
4255 
4256 }
4257 
4258 int si_suspend(struct radeon_device *rdev)
4259 {
4260 	si_cp_enable(rdev, false);
4261 	cayman_dma_stop(rdev);
4262 	si_irq_suspend(rdev);
4263 	radeon_wb_disable(rdev);
4264 	si_pcie_gart_disable(rdev);
4265 	return 0;
4266 }
4267 
4268 /* Plan is to move initialization in that function and use
4269  * helper function so that radeon_device_init pretty much
4270  * do nothing more than calling asic specific function. This
4271  * should also allow to remove a bunch of callback function
4272  * like vram_info.
4273  */
4274 int si_init(struct radeon_device *rdev)
4275 {
4276 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4277 	int r;
4278 
4279 	/* Read BIOS */
4280 	if (!radeon_get_bios(rdev)) {
4281 		if (ASIC_IS_AVIVO(rdev))
4282 			return -EINVAL;
4283 	}
4284 	/* Must be an ATOMBIOS */
4285 	if (!rdev->is_atom_bios) {
4286 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
4287 		return -EINVAL;
4288 	}
4289 	r = radeon_atombios_init(rdev);
4290 	if (r)
4291 		return r;
4292 
4293 	/* Post card if necessary */
4294 	if (!radeon_card_posted(rdev)) {
4295 		if (!rdev->bios) {
4296 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
4297 			return -EINVAL;
4298 		}
4299 		DRM_INFO("GPU not posted. posting now...\n");
4300 		atom_asic_init(rdev->mode_info.atom_context);
4301 	}
4302 	/* Initialize scratch registers */
4303 	si_scratch_init(rdev);
4304 	/* Initialize surface registers */
4305 	radeon_surface_init(rdev);
4306 	/* Initialize clocks */
4307 	radeon_get_clock_info(rdev->ddev);
4308 
4309 	/* Fence driver */
4310 	r = radeon_fence_driver_init(rdev);
4311 	if (r)
4312 		return r;
4313 
4314 	/* initialize memory controller */
4315 	r = si_mc_init(rdev);
4316 	if (r)
4317 		return r;
4318 	/* Memory manager */
4319 	r = radeon_bo_init(rdev);
4320 	if (r)
4321 		return r;
4322 
4323 	r = radeon_irq_kms_init(rdev);
4324 	if (r)
4325 		return r;
4326 
4327 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4328 	ring->ring_obj = NULL;
4329 	r600_ring_init(rdev, ring, 1024 * 1024);
4330 
4331 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
4332 	ring->ring_obj = NULL;
4333 	r600_ring_init(rdev, ring, 1024 * 1024);
4334 
4335 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
4336 	ring->ring_obj = NULL;
4337 	r600_ring_init(rdev, ring, 1024 * 1024);
4338 
4339 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4340 	ring->ring_obj = NULL;
4341 	r600_ring_init(rdev, ring, 64 * 1024);
4342 
4343 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4344 	ring->ring_obj = NULL;
4345 	r600_ring_init(rdev, ring, 64 * 1024);
4346 
4347 	rdev->ih.ring_obj = NULL;
4348 	r600_ih_ring_init(rdev, 64 * 1024);
4349 
4350 	r = r600_pcie_gart_init(rdev);
4351 	if (r)
4352 		return r;
4353 
4354 	rdev->accel_working = true;
4355 	r = si_startup(rdev);
4356 	if (r) {
4357 		dev_err(rdev->dev, "disabling GPU acceleration\n");
4358 		si_cp_fini(rdev);
4359 		cayman_dma_fini(rdev);
4360 		si_irq_fini(rdev);
4361 		si_rlc_fini(rdev);
4362 		radeon_wb_fini(rdev);
4363 		radeon_ib_pool_fini(rdev);
4364 		radeon_vm_manager_fini(rdev);
4365 		radeon_irq_kms_fini(rdev);
4366 		si_pcie_gart_fini(rdev);
4367 		rdev->accel_working = false;
4368 	}
4369 
4370 	/* Don't start up if the MC ucode is missing.
4371 	 * The default clocks and voltages before the MC ucode
4372 	 * is loaded are not suffient for advanced operations.
4373 	 */
4374 	if (!rdev->mc_fw) {
4375 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
4376 		return -EINVAL;
4377 	}
4378 
4379 	return 0;
4380 }
4381 
4382 void si_fini(struct radeon_device *rdev)
4383 {
4384 #if 0
4385 	r600_blit_fini(rdev);
4386 #endif
4387 	si_cp_fini(rdev);
4388 	cayman_dma_fini(rdev);
4389 	si_irq_fini(rdev);
4390 	si_rlc_fini(rdev);
4391 	radeon_wb_fini(rdev);
4392 	radeon_vm_manager_fini(rdev);
4393 	radeon_ib_pool_fini(rdev);
4394 	radeon_irq_kms_fini(rdev);
4395 	si_pcie_gart_fini(rdev);
4396 	r600_vram_scratch_fini(rdev);
4397 	radeon_gem_fini(rdev);
4398 	radeon_fence_driver_fini(rdev);
4399 	radeon_bo_fini(rdev);
4400 	radeon_atombios_fini(rdev);
4401 	si_fini_microcode(rdev);
4402 	drm_free(rdev->bios, DRM_MEM_DRIVER);
4403 	rdev->bios = NULL;
4404 }
4405 
4406 /**
4407  * si_get_gpu_clock - return GPU clock counter snapshot
4408  *
4409  * @rdev: radeon_device pointer
4410  *
4411  * Fetches a GPU clock counter snapshot (SI).
4412  * Returns the 64 bit clock counter snapshot.
4413  */
4414 uint64_t si_get_gpu_clock(struct radeon_device *rdev)
4415 {
4416 	uint64_t clock;
4417 
4418 	spin_lock(&rdev->gpu_clock_mutex);
4419 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4420 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
4421 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4422 	spin_unlock(&rdev->gpu_clock_mutex);
4423 	return clock;
4424 }
4425