1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors: Marek Olšák <maraeo@gmail.com>
24  *
25  */
26 
27 #include "r600_cs.h"
28 #include "evergreend.h"
29 
30 /* 2xMSAA
31  * There are two locations (4, 4), (-4, -4). */
32 const uint32_t eg_sample_locs_2x[4] = {
33 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
34 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
35 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
36 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
37 };
38 const unsigned eg_max_dist_2x = 4;
39 /* 4xMSAA
40  * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
41 const uint32_t eg_sample_locs_4x[4] = {
42 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
43 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
44 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
45 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
46 };
47 const unsigned eg_max_dist_4x = 6;
48 
49 /* Cayman 8xMSAA */
50 static const uint32_t cm_sample_locs_8x[] = {
51 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
52 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
53 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
54 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
55 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
56 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
57 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
58 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
59 };
60 static const unsigned cm_max_dist_8x = 8;
61 /* Cayman 16xMSAA */
62 static const uint32_t cm_sample_locs_16x[] = {
63 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
64 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
65 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
66 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
67 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
68 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
69 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
70 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
71 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
72 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
73 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
74 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
75 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
76 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
77 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
78 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
79 };
80 static const unsigned cm_max_dist_16x = 8;
81 
cayman_get_sample_position(struct pipe_context * ctx,unsigned sample_count,unsigned sample_index,float * out_value)82 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
83 				unsigned sample_index, float *out_value)
84 {
85 	int offset, index;
86 	struct {
87 		int idx:4;
88 	} val;
89 	switch (sample_count) {
90 	case 1:
91 	default:
92 		out_value[0] = out_value[1] = 0.5;
93 		break;
94 	case 2:
95 		offset = 4 * (sample_index * 2);
96 		val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
97 		out_value[0] = (float)(val.idx + 8) / 16.0f;
98 		val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
99 		out_value[1] = (float)(val.idx + 8) / 16.0f;
100 		break;
101 	case 4:
102 		offset = 4 * (sample_index * 2);
103 		val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
104 		out_value[0] = (float)(val.idx + 8) / 16.0f;
105 		val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
106 		out_value[1] = (float)(val.idx + 8) / 16.0f;
107 		break;
108 	case 8:
109 		offset = 4 * (sample_index % 4 * 2);
110 		index = (sample_index / 4) * 4;
111 		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
112 		out_value[0] = (float)(val.idx + 8) / 16.0f;
113 		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
114 		out_value[1] = (float)(val.idx + 8) / 16.0f;
115 		break;
116 	case 16:
117 		offset = 4 * (sample_index % 4 * 2);
118 		index = (sample_index / 4) * 4;
119 		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
120 		out_value[0] = (float)(val.idx + 8) / 16.0f;
121 		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
122 		out_value[1] = (float)(val.idx + 8) / 16.0f;
123 		break;
124 	}
125 }
126 
cayman_init_msaa(struct pipe_context * ctx)127 void cayman_init_msaa(struct pipe_context *ctx)
128 {
129 	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
130 	int i;
131 
132 	cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
133 
134 	for (i = 0; i < 2; i++)
135 		cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
136 	for (i = 0; i < 4; i++)
137 		cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
138 	for (i = 0; i < 8; i++)
139 		cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
140 	for (i = 0; i < 16; i++)
141 		cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
142 }
143 
cayman_emit_msaa_sample_locs(struct radeon_cmdbuf * cs,int nr_samples)144 static void cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples)
145 {
146 	switch (nr_samples) {
147 	default:
148 	case 1:
149 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
150 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
151 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
152 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
153 		break;
154 	case 2:
155 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
156 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
157 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
158 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
159 		break;
160 	case 4:
161 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
162 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
163 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
164 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
165 		break;
166 	case 8:
167 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
168 		radeon_emit(cs, cm_sample_locs_8x[0]);
169 		radeon_emit(cs, cm_sample_locs_8x[4]);
170 		radeon_emit(cs, 0);
171 		radeon_emit(cs, 0);
172 		radeon_emit(cs, cm_sample_locs_8x[1]);
173 		radeon_emit(cs, cm_sample_locs_8x[5]);
174 		radeon_emit(cs, 0);
175 		radeon_emit(cs, 0);
176 		radeon_emit(cs, cm_sample_locs_8x[2]);
177 		radeon_emit(cs, cm_sample_locs_8x[6]);
178 		radeon_emit(cs, 0);
179 		radeon_emit(cs, 0);
180 		radeon_emit(cs, cm_sample_locs_8x[3]);
181 		radeon_emit(cs, cm_sample_locs_8x[7]);
182 		break;
183 	case 16:
184 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
185 		radeon_emit(cs, cm_sample_locs_16x[0]);
186 		radeon_emit(cs, cm_sample_locs_16x[4]);
187 		radeon_emit(cs, cm_sample_locs_16x[8]);
188 		radeon_emit(cs, cm_sample_locs_16x[12]);
189 		radeon_emit(cs, cm_sample_locs_16x[1]);
190 		radeon_emit(cs, cm_sample_locs_16x[5]);
191 		radeon_emit(cs, cm_sample_locs_16x[9]);
192 		radeon_emit(cs, cm_sample_locs_16x[13]);
193 		radeon_emit(cs, cm_sample_locs_16x[2]);
194 		radeon_emit(cs, cm_sample_locs_16x[6]);
195 		radeon_emit(cs, cm_sample_locs_16x[10]);
196 		radeon_emit(cs, cm_sample_locs_16x[14]);
197 		radeon_emit(cs, cm_sample_locs_16x[3]);
198 		radeon_emit(cs, cm_sample_locs_16x[7]);
199 		radeon_emit(cs, cm_sample_locs_16x[11]);
200 		radeon_emit(cs, cm_sample_locs_16x[15]);
201 		break;
202 	}
203 }
204 
cayman_emit_msaa_state(struct radeon_cmdbuf * cs,int nr_samples,int ps_iter_samples,int overrast_samples)205 void cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples,
206 			    int ps_iter_samples, int overrast_samples)
207 {
208 	int setup_samples = nr_samples > 1 ? nr_samples :
209 			    overrast_samples > 1 ? overrast_samples : 0;
210 	/* Required by OpenGL line rasterization.
211 	 *
212 	 * TODO: We should also enable perpendicular endcaps for AA lines,
213 	 *       but that requires implementing line stippling in the pixel
214 	 *       shader. SC can only do line stippling with axis-aligned
215 	 *       endcaps.
216 	 */
217 	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
218 	unsigned sc_mode_cntl_1 =
219 		EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
220 		EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
221 
222 	if (nr_samples > 1) {
223 		cayman_emit_msaa_sample_locs(cs, nr_samples);
224 	}
225 
226 	if (setup_samples > 1) {
227 		/* indexed by log2(nr_samples) */
228 		const unsigned max_dist[] = {
229 			0,
230 			eg_max_dist_2x,
231 			eg_max_dist_4x,
232 			cm_max_dist_8x,
233 			cm_max_dist_16x
234 		};
235 		unsigned log_samples = util_logbase2(setup_samples);
236 		unsigned log_ps_iter_samples =
237 			util_logbase2(util_next_power_of_two(ps_iter_samples));
238 
239 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
240 		radeon_emit(cs, sc_line_cntl |
241 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
242 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
243 			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
244 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
245 
246 		if (nr_samples > 1) {
247 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
248 					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
249 					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
250 					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
251 					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
252 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
253 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
254 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
255 					       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
256 					       sc_mode_cntl_1);
257 		} else if (overrast_samples > 1) {
258 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
259 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
260 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
261 					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
262 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
263 					       sc_mode_cntl_1);
264 		}
265 	} else {
266 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
267 		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
268 		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
269 
270 		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
271 				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
272 				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
273 		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
274 				       sc_mode_cntl_1);
275 	}
276 }
277