1 /*
2    Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3    Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4    develop this 3D driver.
5 
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    "Software"), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13 
14    The above copyright notice and this permission notice (including the
15    next paragraph) shall be included in all copies or substantial
16    portions of the Software.
17 
18    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21    IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31 
32 #include "brw_eu.h"
33 
34 #include <string.h>
35 #include <stdlib.h>
36 
37 #define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
38 
39 /***********************************************************************
40  * Internal helper for constructing instructions
41  */
42 
guess_execution_size(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)43 static void guess_execution_size(struct brw_compile *p,
44 				 struct brw_instruction *insn,
45 				 struct brw_reg reg)
46 {
47 	if (reg.width == BRW_WIDTH_8 && p->compressed)
48 		insn->header.execution_size = BRW_EXECUTE_16;
49 	else
50 		insn->header.execution_size = reg.width;
51 }
52 
53 
54 /**
55  * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56  * registers, implicitly moving the operand to a message register.
57  *
58  * On Sandybridge, this is no longer the case.  This function performs the
59  * explicit move; it should be called before emitting a SEND instruction.
60  */
61 void
gen6_resolve_implied_move(struct brw_compile * p,struct brw_reg * src,unsigned msg_reg_nr)62 gen6_resolve_implied_move(struct brw_compile *p,
63 			  struct brw_reg *src,
64 			  unsigned msg_reg_nr)
65 {
66 	if (p->gen < 060)
67 		return;
68 
69 	if (src->file == BRW_MESSAGE_REGISTER_FILE)
70 		return;
71 
72 	if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
73 		brw_push_insn_state(p);
74 		brw_set_mask_control(p, BRW_MASK_DISABLE);
75 		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
76 		brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
77 		brw_pop_insn_state(p);
78 	}
79 	*src = brw_message_reg(msg_reg_nr);
80 }
81 
82 static void
gen7_convert_mrf_to_grf(struct brw_compile * p,struct brw_reg * reg)83 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
84 {
85 	/* From the BSpec / ISA Reference / send - [DevIVB+]:
86 	 * "The send with EOT should use register space R112-R127 for <src>. This is
87 	 *  to enable loading of a new thread into the same slot while the message
88 	 *  with EOT for current thread is pending dispatch."
89 	 *
90 	 * Since we're pretending to have 16 MRFs anyway, we may as well use the
91 	 * registers required for messages with EOT.
92 	 */
93 	if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
94 		reg->file = BRW_GENERAL_REGISTER_FILE;
95 		reg->nr += 111;
96 	}
97 }
98 
99 void
brw_set_dest(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg dest)100 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
101 	     struct brw_reg dest)
102 {
103 	if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
104 	    dest.file != BRW_MESSAGE_REGISTER_FILE)
105 		assert(dest.nr < 128);
106 
107 	gen7_convert_mrf_to_grf(p, &dest);
108 
109 	insn->bits1.da1.dest_reg_file = dest.file;
110 	insn->bits1.da1.dest_reg_type = dest.type;
111 	insn->bits1.da1.dest_address_mode = dest.address_mode;
112 
113 	if (dest.address_mode == BRW_ADDRESS_DIRECT) {
114 		insn->bits1.da1.dest_reg_nr = dest.nr;
115 
116 		if (insn->header.access_mode == BRW_ALIGN_1) {
117 			insn->bits1.da1.dest_subreg_nr = dest.subnr;
118 			if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
119 				dest.hstride = BRW_HORIZONTAL_STRIDE_1;
120 			insn->bits1.da1.dest_horiz_stride = dest.hstride;
121 		} else {
122 			insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
123 			insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
124 			/* even ignored in da16, still need to set as '01' */
125 			insn->bits1.da16.dest_horiz_stride = 1;
126 		}
127 	} else {
128 		insn->bits1.ia1.dest_subreg_nr = dest.subnr;
129 
130 		/* These are different sizes in align1 vs align16:
131 		*/
132 		if (insn->header.access_mode == BRW_ALIGN_1) {
133 			insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
134 			if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
135 				dest.hstride = BRW_HORIZONTAL_STRIDE_1;
136 			insn->bits1.ia1.dest_horiz_stride = dest.hstride;
137 		}
138 		else {
139 			insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
140 			/* even ignored in da16, still need to set as '01' */
141 			insn->bits1.ia16.dest_horiz_stride = 1;
142 		}
143 	}
144 
145 	guess_execution_size(p, insn, dest);
146 }
147 
148 static const int reg_type_size[8] = {
149 	[0] = 4,
150 	[1] = 4,
151 	[2] = 2,
152 	[3] = 2,
153 	[4] = 1,
154 	[5] = 1,
155 	[7] = 4
156 };
157 
158 static void
validate_reg(struct brw_instruction * insn,struct brw_reg reg)159 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
160 {
161 	int hstride_for_reg[] = {0, 1, 2, 4};
162 	int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
163 	int width_for_reg[] = {1, 2, 4, 8, 16};
164 	int execsize_for_reg[] = {1, 2, 4, 8, 16};
165 	int width, hstride, vstride, execsize;
166 
167 	if (reg.file == BRW_IMMEDIATE_VALUE) {
168 		/* 3.3.6: Region Parameters.  Restriction: Immediate vectors
169 		 * mean the destination has to be 128-bit aligned and the
170 		 * destination horiz stride has to be a word.
171 		 */
172 		if (reg.type == BRW_REGISTER_TYPE_V) {
173 			assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
174 			       reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
175 		}
176 
177 		return;
178 	}
179 
180 	if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
181 	    reg.nr == BRW_ARF_NULL)
182 		return;
183 
184 	assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
185 	assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
186 	assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
187 	assert(insn->header.execution_size >= 0 && insn->header.execution_size < ARRAY_SIZE(execsize_for_reg));
188 
189 	hstride = hstride_for_reg[reg.hstride];
190 
191 	if (reg.vstride == 0xf) {
192 		vstride = -1;
193 	} else {
194 		vstride = vstride_for_reg[reg.vstride];
195 	}
196 
197 	width = width_for_reg[reg.width];
198 
199 	execsize = execsize_for_reg[insn->header.execution_size];
200 
201 	/* Restrictions from 3.3.10: Register Region Restrictions. */
202 	/* 3. */
203 	assert(execsize >= width);
204 
205 	/* 4. */
206 	if (execsize == width && hstride != 0) {
207 		assert(vstride == -1 || vstride == width * hstride);
208 	}
209 
210 	/* 5. */
211 	if (execsize == width && hstride == 0) {
212 		/* no restriction on vstride. */
213 	}
214 
215 	/* 6. */
216 	if (width == 1) {
217 		assert(hstride == 0);
218 	}
219 
220 	/* 7. */
221 	if (execsize == 1 && width == 1) {
222 		assert(hstride == 0);
223 		assert(vstride == 0);
224 	}
225 
226 	/* 8. */
227 	if (vstride == 0 && hstride == 0) {
228 		assert(width == 1);
229 	}
230 
231 	/* 10. Check destination issues. */
232 }
233 
234 void
brw_set_src0(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)235 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
236 	     struct brw_reg reg)
237 {
238 	if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
239 		assert(reg.nr < 128);
240 
241 	gen7_convert_mrf_to_grf(p, &reg);
242 
243 	validate_reg(insn, reg);
244 
245 	insn->bits1.da1.src0_reg_file = reg.file;
246 	insn->bits1.da1.src0_reg_type = reg.type;
247 	insn->bits2.da1.src0_abs = reg.abs;
248 	insn->bits2.da1.src0_negate = reg.negate;
249 	insn->bits2.da1.src0_address_mode = reg.address_mode;
250 
251 	if (reg.file == BRW_IMMEDIATE_VALUE) {
252 		insn->bits3.ud = reg.dw1.ud;
253 
254 		/* Required to set some fields in src1 as well:
255 		*/
256 		insn->bits1.da1.src1_reg_file = 0; /* arf */
257 		insn->bits1.da1.src1_reg_type = reg.type;
258 	} else {
259 		if (reg.address_mode == BRW_ADDRESS_DIRECT) {
260 			if (insn->header.access_mode == BRW_ALIGN_1) {
261 				insn->bits2.da1.src0_subreg_nr = reg.subnr;
262 				insn->bits2.da1.src0_reg_nr = reg.nr;
263 			} else {
264 				insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
265 				insn->bits2.da16.src0_reg_nr = reg.nr;
266 			}
267 		} else {
268 			insn->bits2.ia1.src0_subreg_nr = reg.subnr;
269 
270 			if (insn->header.access_mode == BRW_ALIGN_1) {
271 				insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
272 			} else {
273 				insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
274 			}
275 		}
276 
277 		if (insn->header.access_mode == BRW_ALIGN_1) {
278 			if (reg.width == BRW_WIDTH_1 &&
279 			    insn->header.execution_size == BRW_EXECUTE_1) {
280 				insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
281 				insn->bits2.da1.src0_width = BRW_WIDTH_1;
282 				insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
283 			} else {
284 				insn->bits2.da1.src0_horiz_stride = reg.hstride;
285 				insn->bits2.da1.src0_width = reg.width;
286 				insn->bits2.da1.src0_vert_stride = reg.vstride;
287 			}
288 		} else {
289 			insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
290 			insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
291 			insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
292 			insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
293 
294 			/* This is an oddity of the fact we're using the same
295 			 * descriptions for registers in align_16 as align_1:
296 			 */
297 			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
298 				insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
299 			else
300 				insn->bits2.da16.src0_vert_stride = reg.vstride;
301 		}
302 	}
303 }
304 
brw_set_src1(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)305 void brw_set_src1(struct brw_compile *p,
306 		  struct brw_instruction *insn,
307 		  struct brw_reg reg)
308 {
309 	assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
310 	assert(reg.nr < 128);
311 
312 	gen7_convert_mrf_to_grf(p, &reg);
313 
314 	validate_reg(insn, reg);
315 
316 	insn->bits1.da1.src1_reg_file = reg.file;
317 	insn->bits1.da1.src1_reg_type = reg.type;
318 	insn->bits3.da1.src1_abs = reg.abs;
319 	insn->bits3.da1.src1_negate = reg.negate;
320 
321 	/* Only src1 can be immediate in two-argument instructions. */
322 	assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323 
324 	if (reg.file == BRW_IMMEDIATE_VALUE) {
325 		insn->bits3.ud = reg.dw1.ud;
326 	} else {
327 		/* This is a hardware restriction, which may or may not be lifted
328 		 * in the future:
329 		 */
330 		assert (reg.address_mode == BRW_ADDRESS_DIRECT);
331 		/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
332 
333 		if (insn->header.access_mode == BRW_ALIGN_1) {
334 			insn->bits3.da1.src1_subreg_nr = reg.subnr;
335 			insn->bits3.da1.src1_reg_nr = reg.nr;
336 		} else {
337 			insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
338 			insn->bits3.da16.src1_reg_nr = reg.nr;
339 		}
340 
341 		if (insn->header.access_mode == BRW_ALIGN_1) {
342 			if (reg.width == BRW_WIDTH_1 &&
343 			    insn->header.execution_size == BRW_EXECUTE_1) {
344 				insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
345 				insn->bits3.da1.src1_width = BRW_WIDTH_1;
346 				insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
347 			} else {
348 				insn->bits3.da1.src1_horiz_stride = reg.hstride;
349 				insn->bits3.da1.src1_width = reg.width;
350 				insn->bits3.da1.src1_vert_stride = reg.vstride;
351 			}
352 		} else {
353 			insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
354 			insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
355 			insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
356 			insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
357 
358 			/* This is an oddity of the fact we're using the same
359 			 * descriptions for registers in align_16 as align_1:
360 			 */
361 			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
362 				insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
363 			else
364 				insn->bits3.da16.src1_vert_stride = reg.vstride;
365 		}
366 	}
367 }
368 
369 /**
370  * Set the Message Descriptor and Extended Message Descriptor fields
371  * for SEND messages.
372  *
373  * \note This zeroes out the Function Control bits, so it must be called
374  *       \b before filling out any message-specific data.  Callers can
375  *       choose not to fill in irrelevant bits; they will be zero.
376  */
377 static void
brw_set_message_descriptor(struct brw_compile * p,struct brw_instruction * inst,enum brw_message_target sfid,unsigned msg_length,unsigned response_length,bool header_present,bool end_of_thread)378 brw_set_message_descriptor(struct brw_compile *p,
379 			   struct brw_instruction *inst,
380 			   enum brw_message_target sfid,
381 			   unsigned msg_length,
382 			   unsigned response_length,
383 			   bool header_present,
384 			   bool end_of_thread)
385 {
386 	brw_set_src1(p, inst, brw_imm_d(0));
387 
388 	if (p->gen >= 050) {
389 		inst->bits3.generic_gen5.header_present = header_present;
390 		inst->bits3.generic_gen5.response_length = response_length;
391 		inst->bits3.generic_gen5.msg_length = msg_length;
392 		inst->bits3.generic_gen5.end_of_thread = end_of_thread;
393 
394 		if (p->gen >= 060) {
395 			/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
396 			inst->header.destreg__conditionalmod = sfid;
397 		} else {
398 			/* Set Extended Message Descriptor (ex_desc) */
399 			inst->bits2.send_gen5.sfid = sfid;
400 			inst->bits2.send_gen5.end_of_thread = end_of_thread;
401 		}
402 	} else {
403 		inst->bits3.generic.response_length = response_length;
404 		inst->bits3.generic.msg_length = msg_length;
405 		inst->bits3.generic.msg_target = sfid;
406 		inst->bits3.generic.end_of_thread = end_of_thread;
407 	}
408 }
409 
410 
brw_set_math_message(struct brw_compile * p,struct brw_instruction * insn,unsigned function,unsigned integer_type,bool low_precision,bool saturate,unsigned dataType)411 static void brw_set_math_message(struct brw_compile *p,
412 				 struct brw_instruction *insn,
413 				 unsigned function,
414 				 unsigned integer_type,
415 				 bool low_precision,
416 				 bool saturate,
417 				 unsigned dataType)
418 {
419 	unsigned msg_length;
420 	unsigned response_length;
421 
422 	/* Infer message length from the function */
423 	switch (function) {
424 	case BRW_MATH_FUNCTION_POW:
425 	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
426 	case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
427 	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
428 		msg_length = 2;
429 		break;
430 	default:
431 		msg_length = 1;
432 		break;
433 	}
434 
435 	/* Infer response length from the function */
436 	switch (function) {
437 	case BRW_MATH_FUNCTION_SINCOS:
438 	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
439 		response_length = 2;
440 		break;
441 	default:
442 		response_length = 1;
443 		break;
444 	}
445 
446 	brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
447 				   msg_length, response_length,
448 				   false, false);
449 	if (p->gen == 050) {
450 		insn->bits3.math_gen5.function = function;
451 		insn->bits3.math_gen5.int_type = integer_type;
452 		insn->bits3.math_gen5.precision = low_precision;
453 		insn->bits3.math_gen5.saturate = saturate;
454 		insn->bits3.math_gen5.data_type = dataType;
455 		insn->bits3.math_gen5.snapshot = 0;
456 	} else {
457 		insn->bits3.math.function = function;
458 		insn->bits3.math.int_type = integer_type;
459 		insn->bits3.math.precision = low_precision;
460 		insn->bits3.math.saturate = saturate;
461 		insn->bits3.math.data_type = dataType;
462 	}
463 }
464 
brw_set_ff_sync_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,unsigned response_length,bool end_of_thread)465 static void brw_set_ff_sync_message(struct brw_compile *p,
466 				    struct brw_instruction *insn,
467 				    bool allocate,
468 				    unsigned response_length,
469 				    bool end_of_thread)
470 {
471 	brw_set_message_descriptor(p, insn, BRW_SFID_URB,
472 				   1, response_length,
473 				   true, end_of_thread);
474 	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
475 	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
476 	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
477 	insn->bits3.urb_gen5.allocate = allocate;
478 	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
479 	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
480 }
481 
brw_set_urb_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,bool used,unsigned msg_length,unsigned response_length,bool end_of_thread,bool complete,unsigned offset,unsigned swizzle_control)482 static void brw_set_urb_message(struct brw_compile *p,
483 				struct brw_instruction *insn,
484 				bool allocate,
485 				bool used,
486 				unsigned msg_length,
487 				unsigned response_length,
488 				bool end_of_thread,
489 				bool complete,
490 				unsigned offset,
491 				unsigned swizzle_control)
492 {
493 	brw_set_message_descriptor(p, insn, BRW_SFID_URB,
494 				   msg_length, response_length, true, end_of_thread);
495 	if (p->gen >= 070) {
496 		insn->bits3.urb_gen7.opcode = 0;	/* URB_WRITE_HWORD */
497 		insn->bits3.urb_gen7.offset = offset;
498 		assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
499 		insn->bits3.urb_gen7.swizzle_control = swizzle_control;
500 		/* per_slot_offset = 0 makes it ignore offsets in message header */
501 		insn->bits3.urb_gen7.per_slot_offset = 0;
502 		insn->bits3.urb_gen7.complete = complete;
503 	} else if (p->gen >= 050) {
504 		insn->bits3.urb_gen5.opcode = 0;	/* URB_WRITE */
505 		insn->bits3.urb_gen5.offset = offset;
506 		insn->bits3.urb_gen5.swizzle_control = swizzle_control;
507 		insn->bits3.urb_gen5.allocate = allocate;
508 		insn->bits3.urb_gen5.used = used;	/* ? */
509 		insn->bits3.urb_gen5.complete = complete;
510 	} else {
511 		insn->bits3.urb.opcode = 0;	/* ? */
512 		insn->bits3.urb.offset = offset;
513 		insn->bits3.urb.swizzle_control = swizzle_control;
514 		insn->bits3.urb.allocate = allocate;
515 		insn->bits3.urb.used = used;	/* ? */
516 		insn->bits3.urb.complete = complete;
517 	}
518 }
519 
520 void
brw_set_dp_write_message(struct brw_compile * p,struct brw_instruction * insn,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned msg_length,bool header_present,bool last_render_target,unsigned response_length,bool end_of_thread,bool send_commit_msg)521 brw_set_dp_write_message(struct brw_compile *p,
522 			 struct brw_instruction *insn,
523 			 unsigned binding_table_index,
524 			 unsigned msg_control,
525 			 unsigned msg_type,
526 			 unsigned msg_length,
527 			 bool header_present,
528 			 bool last_render_target,
529 			 unsigned response_length,
530 			 bool end_of_thread,
531 			 bool send_commit_msg)
532 {
533 	unsigned sfid;
534 
535 	if (p->gen >= 070) {
536 		/* Use the Render Cache for RT writes; otherwise use the Data Cache */
537 		if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
538 			sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
539 		else
540 			sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
541 	} else if (p->gen >= 060) {
542 		/* Use the render cache for all write messages. */
543 		sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
544 	} else {
545 		sfid = BRW_SFID_DATAPORT_WRITE;
546 	}
547 
548 	brw_set_message_descriptor(p, insn, sfid,
549 				   msg_length, response_length,
550 				   header_present, end_of_thread);
551 
552 	if (p->gen >= 070) {
553 		insn->bits3.gen7_dp.binding_table_index = binding_table_index;
554 		insn->bits3.gen7_dp.msg_control = msg_control;
555 		insn->bits3.gen7_dp.last_render_target = last_render_target;
556 		insn->bits3.gen7_dp.msg_type = msg_type;
557 	} else if (p->gen >= 060) {
558 		insn->bits3.gen6_dp.binding_table_index = binding_table_index;
559 		insn->bits3.gen6_dp.msg_control = msg_control;
560 		insn->bits3.gen6_dp.last_render_target = last_render_target;
561 		insn->bits3.gen6_dp.msg_type = msg_type;
562 		insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
563 	} else if (p->gen >= 050) {
564 		insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
565 		insn->bits3.dp_write_gen5.msg_control = msg_control;
566 		insn->bits3.dp_write_gen5.last_render_target = last_render_target;
567 		insn->bits3.dp_write_gen5.msg_type = msg_type;
568 		insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
569 	} else {
570 		insn->bits3.dp_write.binding_table_index = binding_table_index;
571 		insn->bits3.dp_write.msg_control = msg_control;
572 		insn->bits3.dp_write.last_render_target = last_render_target;
573 		insn->bits3.dp_write.msg_type = msg_type;
574 		insn->bits3.dp_write.send_commit_msg = send_commit_msg;
575 	}
576 }
577 
578 void
brw_set_dp_read_message(struct brw_compile * p,struct brw_instruction * insn,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache,unsigned msg_length,unsigned response_length)579 brw_set_dp_read_message(struct brw_compile *p,
580 			struct brw_instruction *insn,
581 			unsigned binding_table_index,
582 			unsigned msg_control,
583 			unsigned msg_type,
584 			unsigned target_cache,
585 			unsigned msg_length,
586 			unsigned response_length)
587 {
588 	unsigned sfid;
589 
590 	if (p->gen >= 070) {
591 		sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
592 	} else if (p->gen >= 060) {
593 		if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
594 			sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
595 		else
596 			sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
597 	} else {
598 		sfid = BRW_SFID_DATAPORT_READ;
599 	}
600 
601 	brw_set_message_descriptor(p, insn, sfid,
602 				   msg_length, response_length,
603 				   true, false);
604 
605 	if (p->gen >= 070) {
606 		insn->bits3.gen7_dp.binding_table_index = binding_table_index;
607 		insn->bits3.gen7_dp.msg_control = msg_control;
608 		insn->bits3.gen7_dp.last_render_target = 0;
609 		insn->bits3.gen7_dp.msg_type = msg_type;
610 	} else if (p->gen >= 060) {
611 		insn->bits3.gen6_dp.binding_table_index = binding_table_index;
612 		insn->bits3.gen6_dp.msg_control = msg_control;
613 		insn->bits3.gen6_dp.last_render_target = 0;
614 		insn->bits3.gen6_dp.msg_type = msg_type;
615 		insn->bits3.gen6_dp.send_commit_msg = 0;
616 	} else if (p->gen >= 050) {
617 		insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
618 		insn->bits3.dp_read_gen5.msg_control = msg_control;
619 		insn->bits3.dp_read_gen5.msg_type = msg_type;
620 		insn->bits3.dp_read_gen5.target_cache = target_cache;
621 	} else if (p->gen >= 045) {
622 		insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
623 		insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
624 		insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
625 		insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
626 	} else {
627 		insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
628 		insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
629 		insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
630 		insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
631 	}
632 }
633 
brw_set_sampler_message(struct brw_compile * p,struct brw_instruction * insn,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned response_length,unsigned msg_length,bool header_present,unsigned simd_mode)634 static void brw_set_sampler_message(struct brw_compile *p,
635                                     struct brw_instruction *insn,
636                                     unsigned binding_table_index,
637                                     unsigned sampler,
638                                     unsigned msg_type,
639                                     unsigned response_length,
640                                     unsigned msg_length,
641                                     bool header_present,
642                                     unsigned simd_mode)
643 {
644 	brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
645 				   msg_length, response_length,
646 				   header_present, false);
647 
648 	if (p->gen >= 070) {
649 		insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
650 		insn->bits3.sampler_gen7.sampler = sampler;
651 		insn->bits3.sampler_gen7.msg_type = msg_type;
652 		insn->bits3.sampler_gen7.simd_mode = simd_mode;
653 	} else if (p->gen >= 050) {
654 		insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
655 		insn->bits3.sampler_gen5.sampler = sampler;
656 		insn->bits3.sampler_gen5.msg_type = msg_type;
657 		insn->bits3.sampler_gen5.simd_mode = simd_mode;
658 	} else if (p->gen >= 045) {
659 		insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
660 		insn->bits3.sampler_g4x.sampler = sampler;
661 		insn->bits3.sampler_g4x.msg_type = msg_type;
662 	} else {
663 		insn->bits3.sampler.binding_table_index = binding_table_index;
664 		insn->bits3.sampler.sampler = sampler;
665 		insn->bits3.sampler.msg_type = msg_type;
666 		insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
667 	}
668 }
669 
670 
brw_NOP(struct brw_compile * p)671 void brw_NOP(struct brw_compile *p)
672 {
673 	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
674 	brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
675 	brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
676 	brw_set_src1(p, insn, brw_imm_ud(0x0));
677 }
678 
679 /***********************************************************************
680  * Comparisons, if/else/endif
681  */
682 
683 static void
push_if_stack(struct brw_compile * p,struct brw_instruction * inst)684 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
685 {
686 	p->if_stack[p->if_stack_depth] = inst;
687 
688 	p->if_stack_depth++;
689 	if (p->if_stack_array_size <= p->if_stack_depth) {
690 		p->if_stack_array_size *= 2;
691 		p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
692 	}
693 }
694 
695 /* EU takes the value from the flag register and pushes it onto some
696  * sort of a stack (presumably merging with any flag value already on
697  * the stack).  Within an if block, the flags at the top of the stack
698  * control execution on each channel of the unit, eg. on each of the
699  * 16 pixel values in our wm programs.
700  *
701  * When the matching 'else' instruction is reached (presumably by
702  * countdown of the instruction count patched in by our ELSE/ENDIF
703  * functions), the relevant flags are inverted.
704  *
705  * When the matching 'endif' instruction is reached, the flags are
706  * popped off.  If the stack is now empty, normal execution resumes.
707  */
708 struct brw_instruction *
brw_IF(struct brw_compile * p,unsigned execute_size)709 brw_IF(struct brw_compile *p, unsigned execute_size)
710 {
711 	struct brw_instruction *insn;
712 
713 	insn = brw_next_insn(p, BRW_OPCODE_IF);
714 
715 	/* Override the defaults for this instruction: */
716 	if (p->gen < 060) {
717 		brw_set_dest(p, insn, brw_ip_reg());
718 		brw_set_src0(p, insn, brw_ip_reg());
719 		brw_set_src1(p, insn, brw_imm_d(0x0));
720 	} else if (p->gen < 070) {
721 		brw_set_dest(p, insn, brw_imm_w(0));
722 		insn->bits1.branch_gen6.jump_count = 0;
723 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
724 		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
725 	} else {
726 		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
727 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
728 		brw_set_src1(p, insn, brw_imm_ud(0));
729 		insn->bits3.break_cont.jip = 0;
730 		insn->bits3.break_cont.uip = 0;
731 	}
732 
733 	insn->header.execution_size = execute_size;
734 	insn->header.compression_control = BRW_COMPRESSION_NONE;
735 	insn->header.predicate_control = BRW_PREDICATE_NORMAL;
736 	insn->header.mask_control = BRW_MASK_ENABLE;
737 	if (!p->single_program_flow)
738 		insn->header.thread_control = BRW_THREAD_SWITCH;
739 
740 	p->current->header.predicate_control = BRW_PREDICATE_NONE;
741 
742 	push_if_stack(p, insn);
743 	return insn;
744 }
745 
746 /* This function is only used for gen6-style IF instructions with an
747  * embedded comparison (conditional modifier).  It is not used on gen7.
748  */
749 struct brw_instruction *
gen6_IF(struct brw_compile * p,uint32_t conditional,struct brw_reg src0,struct brw_reg src1)750 gen6_IF(struct brw_compile *p, uint32_t conditional,
751 	struct brw_reg src0, struct brw_reg src1)
752 {
753 	struct brw_instruction *insn;
754 
755 	insn = brw_next_insn(p, BRW_OPCODE_IF);
756 
757 	brw_set_dest(p, insn, brw_imm_w(0));
758 	if (p->compressed) {
759 		insn->header.execution_size = BRW_EXECUTE_16;
760 	} else {
761 		insn->header.execution_size = BRW_EXECUTE_8;
762 	}
763 	insn->bits1.branch_gen6.jump_count = 0;
764 	brw_set_src0(p, insn, src0);
765 	brw_set_src1(p, insn, src1);
766 
767 	assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
768 	assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
769 	insn->header.destreg__conditionalmod = conditional;
770 
771 	if (!p->single_program_flow)
772 		insn->header.thread_control = BRW_THREAD_SWITCH;
773 
774 	push_if_stack(p, insn);
775 	return insn;
776 }
777 
778 /**
779  * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
780  */
781 static void
convert_IF_ELSE_to_ADD(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst)782 convert_IF_ELSE_to_ADD(struct brw_compile *p,
783 		       struct brw_instruction *if_inst,
784 		       struct brw_instruction *else_inst)
785 {
786 	/* The next instruction (where the ENDIF would be, if it existed) */
787 	struct brw_instruction *next_inst = &p->store[p->nr_insn];
788 
789 	assert(p->single_program_flow);
790 	assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
791 	assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
792 	assert(if_inst->header.execution_size == BRW_EXECUTE_1);
793 
794 	/* Convert IF to an ADD instruction that moves the instruction pointer
795 	 * to the first instruction of the ELSE block.  If there is no ELSE
796 	 * block, point to where ENDIF would be.  Reverse the predicate.
797 	 *
798 	 * There's no need to execute an ENDIF since we don't need to do any
799 	 * stack operations, and if we're currently executing, we just want to
800 	 * continue normally.
801 	 */
802 	if_inst->header.opcode = BRW_OPCODE_ADD;
803 	if_inst->header.predicate_inverse = 1;
804 
805 	if (else_inst != NULL) {
806 		/* Convert ELSE to an ADD instruction that points where the ENDIF
807 		 * would be.
808 		 */
809 		else_inst->header.opcode = BRW_OPCODE_ADD;
810 
811 		if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
812 		else_inst->bits3.ud = (next_inst - else_inst) * 16;
813 	} else {
814 		if_inst->bits3.ud = (next_inst - if_inst) * 16;
815 	}
816 }
817 
818 /**
819  * Patch IF and ELSE instructions with appropriate jump targets.
820  */
821 static void
patch_IF_ELSE(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst,struct brw_instruction * endif_inst)822 patch_IF_ELSE(struct brw_compile *p,
823 	      struct brw_instruction *if_inst,
824 	      struct brw_instruction *else_inst,
825 	      struct brw_instruction *endif_inst)
826 {
827 	unsigned br = 1;
828 
829 	assert(!p->single_program_flow);
830 	assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
831 	assert(endif_inst != NULL);
832 	assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
833 
834 	/* Jump count is for 64bit data chunk each, so one 128bit instruction
835 	 * requires 2 chunks.
836 	 */
837 	if (p->gen >= 050)
838 		br = 2;
839 
840 	assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
841 	endif_inst->header.execution_size = if_inst->header.execution_size;
842 
843 	if (else_inst == NULL) {
844 		/* Patch IF -> ENDIF */
845 		if (p->gen < 060) {
846 			/* Turn it into an IFF, which means no mask stack operations for
847 			 * all-false and jumping past the ENDIF.
848 			 */
849 			if_inst->header.opcode = BRW_OPCODE_IFF;
850 			if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
851 			if_inst->bits3.if_else.pop_count = 0;
852 			if_inst->bits3.if_else.pad0 = 0;
853 		} else if (p->gen < 070) {
854 			/* As of gen6, there is no IFF and IF must point to the ENDIF. */
855 			if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
856 		} else {
857 			if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
858 			if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
859 		}
860 	} else {
861 		else_inst->header.execution_size = if_inst->header.execution_size;
862 
863 		/* Patch IF -> ELSE */
864 		if (p->gen < 060) {
865 			if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
866 			if_inst->bits3.if_else.pop_count = 0;
867 			if_inst->bits3.if_else.pad0 = 0;
868 		} else if (p->gen <= 070) {
869 			if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
870 		}
871 
872 		/* Patch ELSE -> ENDIF */
873 		if (p->gen < 060) {
874 			/* BRW_OPCODE_ELSE pre-gen6 should point just past the
875 			 * matching ENDIF.
876 			 */
877 			else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
878 			else_inst->bits3.if_else.pop_count = 1;
879 			else_inst->bits3.if_else.pad0 = 0;
880 		} else if (p->gen < 070) {
881 			/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
882 			else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
883 		} else {
884 			/* The IF instruction's JIP should point just past the ELSE */
885 			if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
886 			/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
887 			if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
888 			else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
889 		}
890 	}
891 }
892 
893 void
brw_ELSE(struct brw_compile * p)894 brw_ELSE(struct brw_compile *p)
895 {
896 	struct brw_instruction *insn;
897 
898 	insn = brw_next_insn(p, BRW_OPCODE_ELSE);
899 
900 	if (p->gen < 060) {
901 		brw_set_dest(p, insn, brw_ip_reg());
902 		brw_set_src0(p, insn, brw_ip_reg());
903 		brw_set_src1(p, insn, brw_imm_d(0x0));
904 	} else if (p->gen < 070) {
905 		brw_set_dest(p, insn, brw_imm_w(0));
906 		insn->bits1.branch_gen6.jump_count = 0;
907 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
908 		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
909 	} else {
910 		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
911 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
912 		brw_set_src1(p, insn, brw_imm_ud(0));
913 		insn->bits3.break_cont.jip = 0;
914 		insn->bits3.break_cont.uip = 0;
915 	}
916 
917 	insn->header.compression_control = BRW_COMPRESSION_NONE;
918 	insn->header.mask_control = BRW_MASK_ENABLE;
919 	if (!p->single_program_flow)
920 		insn->header.thread_control = BRW_THREAD_SWITCH;
921 
922 	push_if_stack(p, insn);
923 }
924 
925 void
brw_ENDIF(struct brw_compile * p)926 brw_ENDIF(struct brw_compile *p)
927 {
928 	struct brw_instruction *insn;
929 	struct brw_instruction *else_inst = NULL;
930 	struct brw_instruction *if_inst = NULL;
931 
932 	/* Pop the IF and (optional) ELSE instructions from the stack */
933 	p->if_stack_depth--;
934 	if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
935 		else_inst = p->if_stack[p->if_stack_depth];
936 		p->if_stack_depth--;
937 	}
938 	if_inst = p->if_stack[p->if_stack_depth];
939 
940 	if (p->single_program_flow) {
941 		/* ENDIF is useless; don't bother emitting it. */
942 		convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
943 		return;
944 	}
945 
946 	insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
947 
948 	if (p->gen < 060) {
949 		brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
950 		brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
951 		brw_set_src1(p, insn, brw_imm_d(0x0));
952 	} else if (p->gen < 070) {
953 		brw_set_dest(p, insn, brw_imm_w(0));
954 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
955 		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
956 	} else {
957 		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
958 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
959 		brw_set_src1(p, insn, brw_imm_ud(0));
960 	}
961 
962 	insn->header.compression_control = BRW_COMPRESSION_NONE;
963 	insn->header.mask_control = BRW_MASK_ENABLE;
964 	insn->header.thread_control = BRW_THREAD_SWITCH;
965 
966 	/* Also pop item off the stack in the endif instruction: */
967 	if (p->gen < 060) {
968 		insn->bits3.if_else.jump_count = 0;
969 		insn->bits3.if_else.pop_count = 1;
970 		insn->bits3.if_else.pad0 = 0;
971 	} else if (p->gen < 070) {
972 		insn->bits1.branch_gen6.jump_count = 2;
973 	} else {
974 		insn->bits3.break_cont.jip = 2;
975 	}
976 	patch_IF_ELSE(p, if_inst, else_inst, insn);
977 }
978 
brw_BREAK(struct brw_compile * p,int pop_count)979 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
980 {
981 	struct brw_instruction *insn;
982 
983 	insn = brw_next_insn(p, BRW_OPCODE_BREAK);
984 	if (p->gen >= 060) {
985 		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
986 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
987 		brw_set_src1(p, insn, brw_imm_d(0x0));
988 	} else {
989 		brw_set_dest(p, insn, brw_ip_reg());
990 		brw_set_src0(p, insn, brw_ip_reg());
991 		brw_set_src1(p, insn, brw_imm_d(0x0));
992 		insn->bits3.if_else.pad0 = 0;
993 		insn->bits3.if_else.pop_count = pop_count;
994 	}
995 	insn->header.compression_control = BRW_COMPRESSION_NONE;
996 	insn->header.execution_size = BRW_EXECUTE_8;
997 
998 	return insn;
999 }
1000 
gen6_CONT(struct brw_compile * p,struct brw_instruction * do_insn)1001 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1002 				  struct brw_instruction *do_insn)
1003 {
1004 	struct brw_instruction *insn;
1005 
1006 	insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1007 	brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1008 	brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1009 	brw_set_dest(p, insn, brw_ip_reg());
1010 	brw_set_src0(p, insn, brw_ip_reg());
1011 	brw_set_src1(p, insn, brw_imm_d(0x0));
1012 
1013 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1014 	insn->header.execution_size = BRW_EXECUTE_8;
1015 	return insn;
1016 }
1017 
brw_CONT(struct brw_compile * p,int pop_count)1018 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1019 {
1020 	struct brw_instruction *insn;
1021 	insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1022 	brw_set_dest(p, insn, brw_ip_reg());
1023 	brw_set_src0(p, insn, brw_ip_reg());
1024 	brw_set_src1(p, insn, brw_imm_d(0x0));
1025 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1026 	insn->header.execution_size = BRW_EXECUTE_8;
1027 	/* insn->header.mask_control = BRW_MASK_DISABLE; */
1028 	insn->bits3.if_else.pad0 = 0;
1029 	insn->bits3.if_else.pop_count = pop_count;
1030 	return insn;
1031 }
1032 
1033 /* DO/WHILE loop:
1034  *
1035  * The DO/WHILE is just an unterminated loop -- break or continue are
1036  * used for control within the loop.  We have a few ways they can be
1037  * done.
1038  *
1039  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1040  * jip and no DO instruction.
1041  *
1042  * For non-uniform control flow pre-gen6, there's a DO instruction to
1043  * push the mask, and a WHILE to jump back, and BREAK to get out and
1044  * pop the mask.
1045  *
1046  * For gen6, there's no more mask stack, so no need for DO.  WHILE
1047  * just points back to the first instruction of the loop.
1048  */
brw_DO(struct brw_compile * p,unsigned execute_size)1049 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1050 {
1051 	if (p->gen >= 060 || p->single_program_flow) {
1052 		return &p->store[p->nr_insn];
1053 	} else {
1054 		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
1055 
1056 		/* Override the defaults for this instruction:
1057 		*/
1058 		brw_set_dest(p, insn, brw_null_reg());
1059 		brw_set_src0(p, insn, brw_null_reg());
1060 		brw_set_src1(p, insn, brw_null_reg());
1061 
1062 		insn->header.compression_control = BRW_COMPRESSION_NONE;
1063 		insn->header.execution_size = execute_size;
1064 		insn->header.predicate_control = BRW_PREDICATE_NONE;
1065 		/* insn->header.mask_control = BRW_MASK_ENABLE; */
1066 		/* insn->header.mask_control = BRW_MASK_DISABLE; */
1067 
1068 		return insn;
1069 	}
1070 }
1071 
brw_WHILE(struct brw_compile * p,struct brw_instruction * do_insn)1072 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1073                                   struct brw_instruction *do_insn)
1074 {
1075 	struct brw_instruction *insn;
1076 	unsigned br = 1;
1077 
1078 	if (p->gen >= 050)
1079 		br = 2;
1080 
1081 	if (p->gen >= 070) {
1082 		insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1083 
1084 		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1085 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1086 		brw_set_src1(p, insn, brw_imm_ud(0));
1087 		insn->bits3.break_cont.jip = br * (do_insn - insn);
1088 
1089 		insn->header.execution_size = BRW_EXECUTE_8;
1090 	} else if (p->gen >= 060) {
1091 		insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1092 
1093 		brw_set_dest(p, insn, brw_imm_w(0));
1094 		insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1095 		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1096 		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
1097 
1098 		insn->header.execution_size = BRW_EXECUTE_8;
1099 	} else {
1100 		if (p->single_program_flow) {
1101 			insn = brw_next_insn(p, BRW_OPCODE_ADD);
1102 
1103 			brw_set_dest(p, insn, brw_ip_reg());
1104 			brw_set_src0(p, insn, brw_ip_reg());
1105 			brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1106 			insn->header.execution_size = BRW_EXECUTE_1;
1107 		} else {
1108 			insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1109 
1110 			assert(do_insn->header.opcode == BRW_OPCODE_DO);
1111 
1112 			brw_set_dest(p, insn, brw_ip_reg());
1113 			brw_set_src0(p, insn, brw_ip_reg());
1114 			brw_set_src1(p, insn, brw_imm_d(0));
1115 
1116 			insn->header.execution_size = do_insn->header.execution_size;
1117 			insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1118 			insn->bits3.if_else.pop_count = 0;
1119 			insn->bits3.if_else.pad0 = 0;
1120 		}
1121 	}
1122 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1123 	p->current->header.predicate_control = BRW_PREDICATE_NONE;
1124 
1125 	return insn;
1126 }
1127 
1128 /* FORWARD JUMPS:
1129  */
brw_land_fwd_jump(struct brw_compile * p,struct brw_instruction * jmp_insn)1130 void brw_land_fwd_jump(struct brw_compile *p,
1131 		       struct brw_instruction *jmp_insn)
1132 {
1133 	struct brw_instruction *landing = &p->store[p->nr_insn];
1134 	unsigned jmpi = 1;
1135 
1136 	if (p->gen >= 050)
1137 		jmpi = 2;
1138 
1139 	assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1140 	assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1141 
1142 	jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1143 }
1144 
1145 
1146 
1147 /* To integrate with the above, it makes sense that the comparison
1148  * instruction should populate the flag register.  It might be simpler
1149  * just to use the flag reg for most WM tasks?
1150  */
brw_CMP(struct brw_compile * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1151 void brw_CMP(struct brw_compile *p,
1152 	     struct brw_reg dest,
1153 	     unsigned conditional,
1154 	     struct brw_reg src0,
1155 	     struct brw_reg src1)
1156 {
1157 	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
1158 
1159 	insn->header.destreg__conditionalmod = conditional;
1160 	brw_set_dest(p, insn, dest);
1161 	brw_set_src0(p, insn, src0);
1162 	brw_set_src1(p, insn, src1);
1163 
1164 	/* Make it so that future instructions will use the computed flag
1165 	 * value until brw_set_predicate_control_flag_value() is called
1166 	 * again.
1167 	 */
1168 	if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1169 	    dest.nr == 0) {
1170 		p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1171 		p->flag_value = 0xff;
1172 	}
1173 }
1174 
1175 /* Issue 'wait' instruction for n1, host could program MMIO
1176    to wake up thread. */
brw_WAIT(struct brw_compile * p)1177 void brw_WAIT(struct brw_compile *p)
1178 {
1179 	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
1180 	struct brw_reg src = brw_notification_1_reg();
1181 
1182 	brw_set_dest(p, insn, src);
1183 	brw_set_src0(p, insn, src);
1184 	brw_set_src1(p, insn, brw_null_reg());
1185 	insn->header.execution_size = 0; /* must */
1186 	insn->header.predicate_control = 0;
1187 	insn->header.compression_control = 0;
1188 }
1189 
1190 /***********************************************************************
1191  * Helpers for the various SEND message types:
1192  */
1193 
1194 /** Extended math function, float[8].
1195  */
brw_math(struct brw_compile * p,struct brw_reg dest,unsigned function,unsigned saturate,unsigned msg_reg_nr,struct brw_reg src,unsigned data_type,unsigned precision)1196 void brw_math(struct brw_compile *p,
1197 	      struct brw_reg dest,
1198 	      unsigned function,
1199 	      unsigned saturate,
1200 	      unsigned msg_reg_nr,
1201 	      struct brw_reg src,
1202 	      unsigned data_type,
1203 	      unsigned precision)
1204 {
1205 	if (p->gen >= 060) {
1206 		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1207 
1208 		assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1209 		assert(src.file == BRW_GENERAL_REGISTER_FILE);
1210 
1211 		assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1212 		assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1213 
1214 		/* Source modifiers are ignored for extended math instructions. */
1215 		assert(!src.negate);
1216 		assert(!src.abs);
1217 
1218 		if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1219 		    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1220 			assert(src.type == BRW_REGISTER_TYPE_F);
1221 		}
1222 
1223 		/* Math is the same ISA format as other opcodes, except that CondModifier
1224 		 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1225 		 */
1226 		insn->header.destreg__conditionalmod = function;
1227 		insn->header.saturate = saturate;
1228 
1229 		brw_set_dest(p, insn, dest);
1230 		brw_set_src0(p, insn, src);
1231 		brw_set_src1(p, insn, brw_null_reg());
1232 	} else {
1233 		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1234 		/* Example code doesn't set predicate_control for send
1235 		 * instructions.
1236 		 */
1237 		insn->header.predicate_control = 0;
1238 		insn->header.destreg__conditionalmod = msg_reg_nr;
1239 
1240 		brw_set_dest(p, insn, dest);
1241 		brw_set_src0(p, insn, src);
1242 		brw_set_math_message(p, insn, function,
1243 				     src.type == BRW_REGISTER_TYPE_D,
1244 				     precision,
1245 				     saturate,
1246 				     data_type);
1247 	}
1248 }
1249 
1250 /** Extended math function, float[8].
1251  */
brw_math2(struct brw_compile * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1252 void brw_math2(struct brw_compile *p,
1253 	       struct brw_reg dest,
1254 	       unsigned function,
1255 	       struct brw_reg src0,
1256 	       struct brw_reg src1)
1257 {
1258 	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1259 
1260 	assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1261 	assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1262 	assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1263 
1264 	assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1265 	assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1266 	assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1267 
1268 	if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1269 	    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1270 		assert(src0.type == BRW_REGISTER_TYPE_F);
1271 		assert(src1.type == BRW_REGISTER_TYPE_F);
1272 	}
1273 
1274 	/* Source modifiers are ignored for extended math instructions. */
1275 	assert(!src0.negate);
1276 	assert(!src0.abs);
1277 	assert(!src1.negate);
1278 	assert(!src1.abs);
1279 
1280 	/* Math is the same ISA format as other opcodes, except that CondModifier
1281 	 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1282 	 */
1283 	insn->header.destreg__conditionalmod = function;
1284 
1285 	brw_set_dest(p, insn, dest);
1286 	brw_set_src0(p, insn, src0);
1287 	brw_set_src1(p, insn, src1);
1288 }
1289 
1290 /**
1291  * Extended math function, float[16].
1292  * Use 2 send instructions.
1293  */
brw_math_16(struct brw_compile * p,struct brw_reg dest,unsigned function,unsigned saturate,unsigned msg_reg_nr,struct brw_reg src,unsigned precision)1294 void brw_math_16(struct brw_compile *p,
1295 		 struct brw_reg dest,
1296 		 unsigned function,
1297 		 unsigned saturate,
1298 		 unsigned msg_reg_nr,
1299 		 struct brw_reg src,
1300 		 unsigned precision)
1301 {
1302 	struct brw_instruction *insn;
1303 
1304 	if (p->gen >= 060) {
1305 		insn = brw_next_insn(p, BRW_OPCODE_MATH);
1306 
1307 		/* Math is the same ISA format as other opcodes, except that CondModifier
1308 		 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1309 		 */
1310 		insn->header.destreg__conditionalmod = function;
1311 		insn->header.saturate = saturate;
1312 
1313 		/* Source modifiers are ignored for extended math instructions. */
1314 		assert(!src.negate);
1315 		assert(!src.abs);
1316 
1317 		brw_set_dest(p, insn, dest);
1318 		brw_set_src0(p, insn, src);
1319 		brw_set_src1(p, insn, brw_null_reg());
1320 		return;
1321 	}
1322 
1323 	/* First instruction:
1324 	*/
1325 	brw_push_insn_state(p);
1326 	brw_set_predicate_control_flag_value(p, 0xff);
1327 	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1328 
1329 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1330 	insn->header.destreg__conditionalmod = msg_reg_nr;
1331 
1332 	brw_set_dest(p, insn, dest);
1333 	brw_set_src0(p, insn, src);
1334 	brw_set_math_message(p, insn, function,
1335 			     BRW_MATH_INTEGER_UNSIGNED,
1336 			     precision,
1337 			     saturate,
1338 			     BRW_MATH_DATA_VECTOR);
1339 
1340 	/* Second instruction:
1341 	*/
1342 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1343 	insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1344 	insn->header.destreg__conditionalmod = msg_reg_nr+1;
1345 
1346 	brw_set_dest(p, insn, __offset(dest,1));
1347 	brw_set_src0(p, insn, src);
1348 	brw_set_math_message(p, insn, function,
1349 			     BRW_MATH_INTEGER_UNSIGNED,
1350 			     precision,
1351 			     saturate,
1352 			     BRW_MATH_DATA_VECTOR);
1353 
1354 	brw_pop_insn_state(p);
1355 }
1356 
1357 /**
1358  * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1359  * using a constant offset per channel.
1360  *
1361  * The offset must be aligned to oword size (16 bytes).  Used for
1362  * register spilling.
1363  */
brw_oword_block_write_scratch(struct brw_compile * p,struct brw_reg mrf,int num_regs,unsigned offset)1364 void brw_oword_block_write_scratch(struct brw_compile *p,
1365 				   struct brw_reg mrf,
1366 				   int num_regs,
1367 				   unsigned offset)
1368 {
1369 	uint32_t msg_control, msg_type;
1370 	int mlen;
1371 
1372 	if (p->gen >= 060)
1373 		offset /= 16;
1374 
1375 	mrf = __retype_ud(mrf);
1376 
1377 	if (num_regs == 1) {
1378 		msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1379 		mlen = 2;
1380 	} else {
1381 		msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1382 		mlen = 3;
1383 	}
1384 
1385 	/* Set up the message header.  This is g0, with g0.2 filled with
1386 	 * the offset.  We don't want to leave our offset around in g0 or
1387 	 * it'll screw up texture samples, so set it up inside the message
1388 	 * reg.
1389 	 */
1390 	{
1391 		brw_push_insn_state(p);
1392 		brw_set_mask_control(p, BRW_MASK_DISABLE);
1393 		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1394 
1395 		brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1396 
1397 		/* set message header global offset field (reg 0, element 2) */
1398 		brw_MOV(p,
1399 			__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1400 			brw_imm_ud(offset));
1401 
1402 		brw_pop_insn_state(p);
1403 	}
1404 
1405 	{
1406 		struct brw_reg dest;
1407 		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1408 		int send_commit_msg;
1409 		struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
1410 
1411 		if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1412 			insn->header.compression_control = BRW_COMPRESSION_NONE;
1413 			src_header = vec16(src_header);
1414 		}
1415 		assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1416 		insn->header.destreg__conditionalmod = mrf.nr;
1417 
1418 		/* Until gen6, writes followed by reads from the same location
1419 		 * are not guaranteed to be ordered unless write_commit is set.
1420 		 * If set, then a no-op write is issued to the destination
1421 		 * register to set a dependency, and a read from the destination
1422 		 * can be used to ensure the ordering.
1423 		 *
1424 		 * For gen6, only writes between different threads need ordering
1425 		 * protection.  Our use of DP writes is all about register
1426 		 * spilling within a thread.
1427 		 */
1428 		if (p->gen >= 060) {
1429 			dest = __retype_uw(vec16(brw_null_reg()));
1430 			send_commit_msg = 0;
1431 		} else {
1432 			dest = src_header;
1433 			send_commit_msg = 1;
1434 		}
1435 
1436 		brw_set_dest(p, insn, dest);
1437 		if (p->gen >= 060) {
1438 			brw_set_src0(p, insn, mrf);
1439 		} else {
1440 			brw_set_src0(p, insn, brw_null_reg());
1441 		}
1442 
1443 		if (p->gen >= 060)
1444 			msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1445 		else
1446 			msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1447 
1448 		brw_set_dp_write_message(p,
1449 					 insn,
1450 					 255, /* binding table index (255=stateless) */
1451 					 msg_control,
1452 					 msg_type,
1453 					 mlen,
1454 					 true, /* header_present */
1455 					 0, /* pixel scoreboard */
1456 					 send_commit_msg, /* response_length */
1457 					 0, /* eot */
1458 					 send_commit_msg);
1459 	}
1460 }
1461 
1462 
1463 /**
1464  * Read a block of owords (half a GRF each) from the scratch buffer
1465  * using a constant index per channel.
1466  *
1467  * Offset must be aligned to oword size (16 bytes).  Used for register
1468  * spilling.
1469  */
1470 void
brw_oword_block_read_scratch(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,int num_regs,unsigned offset)1471 brw_oword_block_read_scratch(struct brw_compile *p,
1472 			     struct brw_reg dest,
1473 			     struct brw_reg mrf,
1474 			     int num_regs,
1475 			     unsigned offset)
1476 {
1477 	uint32_t msg_control;
1478 	int rlen;
1479 
1480 	if (p->gen >= 060)
1481 		offset /= 16;
1482 
1483 	mrf = __retype_ud(mrf);
1484 	dest = __retype_uw(dest);
1485 
1486 	if (num_regs == 1) {
1487 		msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1488 		rlen = 1;
1489 	} else {
1490 		msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1491 		rlen = 2;
1492 	}
1493 
1494 	{
1495 		brw_push_insn_state(p);
1496 		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1497 		brw_set_mask_control(p, BRW_MASK_DISABLE);
1498 
1499 		brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1500 
1501 		/* set message header global offset field (reg 0, element 2) */
1502 		brw_MOV(p,
1503 			__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1504 			brw_imm_ud(offset));
1505 
1506 		brw_pop_insn_state(p);
1507 	}
1508 
1509 	{
1510 		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1511 
1512 		assert(insn->header.predicate_control == 0);
1513 		insn->header.compression_control = BRW_COMPRESSION_NONE;
1514 		insn->header.destreg__conditionalmod = mrf.nr;
1515 
1516 		brw_set_dest(p, insn, dest); /* UW? */
1517 		if (p->gen >= 060) {
1518 			brw_set_src0(p, insn, mrf);
1519 		} else {
1520 			brw_set_src0(p, insn, brw_null_reg());
1521 		}
1522 
1523 		brw_set_dp_read_message(p,
1524 					insn,
1525 					255, /* binding table index (255=stateless) */
1526 					msg_control,
1527 					BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1528 					BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1529 					1, /* msg_length */
1530 					rlen);
1531 	}
1532 }
1533 
1534 /**
1535  * Read a float[4] vector from the data port Data Cache (const buffer).
1536  * Location (in buffer) should be a multiple of 16.
1537  * Used for fetching shader constants.
1538  */
brw_oword_block_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t offset,uint32_t bind_table_index)1539 void brw_oword_block_read(struct brw_compile *p,
1540 			  struct brw_reg dest,
1541 			  struct brw_reg mrf,
1542 			  uint32_t offset,
1543 			  uint32_t bind_table_index)
1544 {
1545 	struct brw_instruction *insn;
1546 
1547 	/* On newer hardware, offset is in units of owords. */
1548 	if (p->gen >= 060)
1549 		offset /= 16;
1550 
1551 	mrf = __retype_ud(mrf);
1552 
1553 	brw_push_insn_state(p);
1554 	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1555 	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1556 	brw_set_mask_control(p, BRW_MASK_DISABLE);
1557 
1558 	brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1559 
1560 	/* set message header global offset field (reg 0, element 2) */
1561 	brw_MOV(p,
1562 		__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1563 		brw_imm_ud(offset));
1564 
1565 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1566 	insn->header.destreg__conditionalmod = mrf.nr;
1567 
1568 	/* cast dest to a uword[8] vector */
1569 	dest = __retype_uw(vec8(dest));
1570 
1571 	brw_set_dest(p, insn, dest);
1572 	if (p->gen >= 060) {
1573 		brw_set_src0(p, insn, mrf);
1574 	} else {
1575 		brw_set_src0(p, insn, brw_null_reg());
1576 	}
1577 
1578 	brw_set_dp_read_message(p,
1579 				insn,
1580 				bind_table_index,
1581 				BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1582 				BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1583 				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1584 				1, /* msg_length */
1585 				1); /* response_length (1 reg, 2 owords!) */
1586 
1587 	brw_pop_insn_state(p);
1588 }
1589 
1590 /**
1591  * Read a set of dwords from the data port Data Cache (const buffer).
1592  *
1593  * Location (in buffer) appears as UD offsets in the register after
1594  * the provided mrf header reg.
1595  */
brw_dword_scattered_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t bind_table_index)1596 void brw_dword_scattered_read(struct brw_compile *p,
1597 			      struct brw_reg dest,
1598 			      struct brw_reg mrf,
1599 			      uint32_t bind_table_index)
1600 {
1601 	struct brw_instruction *insn;
1602 
1603 	mrf = __retype_ud(mrf);
1604 
1605 	brw_push_insn_state(p);
1606 	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1607 	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1608 	brw_set_mask_control(p, BRW_MASK_DISABLE);
1609 	brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1610 	brw_pop_insn_state(p);
1611 
1612 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1613 	insn->header.destreg__conditionalmod = mrf.nr;
1614 
1615 	/* cast dest to a uword[8] vector */
1616 	dest = __retype_uw(vec8(dest));
1617 
1618 	brw_set_dest(p, insn, dest);
1619 	brw_set_src0(p, insn, brw_null_reg());
1620 
1621 	brw_set_dp_read_message(p,
1622 				insn,
1623 				bind_table_index,
1624 				BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1625 				BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1626 				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1627 				2, /* msg_length */
1628 				1); /* response_length */
1629 }
1630 
1631 /**
1632  * Read float[4] constant(s) from VS constant buffer.
1633  * For relative addressing, two float[4] constants will be read into 'dest'.
1634  * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1635  */
brw_dp_READ_4_vs(struct brw_compile * p,struct brw_reg dest,unsigned location,unsigned bind_table_index)1636 void brw_dp_READ_4_vs(struct brw_compile *p,
1637                       struct brw_reg dest,
1638                       unsigned location,
1639                       unsigned bind_table_index)
1640 {
1641 	struct brw_instruction *insn;
1642 	unsigned msg_reg_nr = 1;
1643 
1644 	if (p->gen >= 060)
1645 		location /= 16;
1646 
1647 	/* Setup MRF[1] with location/offset into const buffer */
1648 	brw_push_insn_state(p);
1649 	brw_set_access_mode(p, BRW_ALIGN_1);
1650 	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1651 	brw_set_mask_control(p, BRW_MASK_DISABLE);
1652 	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1653 	brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
1654 		brw_imm_ud(location));
1655 	brw_pop_insn_state(p);
1656 
1657 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1658 
1659 	insn->header.predicate_control = BRW_PREDICATE_NONE;
1660 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1661 	insn->header.destreg__conditionalmod = msg_reg_nr;
1662 	insn->header.mask_control = BRW_MASK_DISABLE;
1663 
1664 	brw_set_dest(p, insn, dest);
1665 	if (p->gen >= 060) {
1666 		brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1667 	} else {
1668 		brw_set_src0(p, insn, brw_null_reg());
1669 	}
1670 
1671 	brw_set_dp_read_message(p,
1672 				insn,
1673 				bind_table_index,
1674 				0,
1675 				BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1676 				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1677 				1, /* msg_length */
1678 				1); /* response_length (1 Oword) */
1679 }
1680 
1681 /**
1682  * Read a float[4] constant per vertex from VS constant buffer, with
1683  * relative addressing.
1684  */
brw_dp_READ_4_vs_relative(struct brw_compile * p,struct brw_reg dest,struct brw_reg addr_reg,unsigned offset,unsigned bind_table_index)1685 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1686 			       struct brw_reg dest,
1687 			       struct brw_reg addr_reg,
1688 			       unsigned offset,
1689 			       unsigned bind_table_index)
1690 {
1691 	struct brw_reg src = brw_vec8_grf(0, 0);
1692 	struct brw_instruction *insn;
1693 	int msg_type;
1694 
1695 	/* Setup MRF[1] with offset into const buffer */
1696 	brw_push_insn_state(p);
1697 	brw_set_access_mode(p, BRW_ALIGN_1);
1698 	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1699 	brw_set_mask_control(p, BRW_MASK_DISABLE);
1700 	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1701 
1702 	/* M1.0 is block offset 0, M1.4 is block offset 1, all other
1703 	 * fields ignored.
1704 	 */
1705 	brw_ADD(p, __retype_d(brw_message_reg(1)),
1706 		addr_reg, brw_imm_d(offset));
1707 	brw_pop_insn_state(p);
1708 
1709 	gen6_resolve_implied_move(p, &src, 0);
1710 
1711 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1712 	insn->header.predicate_control = BRW_PREDICATE_NONE;
1713 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1714 	insn->header.destreg__conditionalmod = 0;
1715 	insn->header.mask_control = BRW_MASK_DISABLE;
1716 
1717 	brw_set_dest(p, insn, dest);
1718 	brw_set_src0(p, insn, src);
1719 
1720 	if (p->gen >= 060)
1721 		msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1722 	else if (p->gen >= 045)
1723 		msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1724 	else
1725 		msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1726 
1727 	brw_set_dp_read_message(p,
1728 				insn,
1729 				bind_table_index,
1730 				BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1731 				msg_type,
1732 				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1733 				2, /* msg_length */
1734 				1); /* response_length */
1735 }
1736 
brw_fb_WRITE(struct brw_compile * p,int dispatch_width,unsigned msg_reg_nr,struct brw_reg src0,unsigned msg_control,unsigned binding_table_index,unsigned msg_length,unsigned response_length,bool eot,bool header_present)1737 void brw_fb_WRITE(struct brw_compile *p,
1738 		  int dispatch_width,
1739                   unsigned msg_reg_nr,
1740                   struct brw_reg src0,
1741                   unsigned msg_control,
1742                   unsigned binding_table_index,
1743                   unsigned msg_length,
1744                   unsigned response_length,
1745                   bool eot,
1746                   bool header_present)
1747 {
1748 	struct brw_instruction *insn;
1749 	unsigned msg_type;
1750 	struct brw_reg dest;
1751 
1752 	if (dispatch_width == 16)
1753 		dest = __retype_uw(vec16(brw_null_reg()));
1754 	else
1755 		dest = __retype_uw(vec8(brw_null_reg()));
1756 
1757 	if (p->gen >= 060 && binding_table_index == 0) {
1758 		insn = brw_next_insn(p, BRW_OPCODE_SENDC);
1759 	} else {
1760 		insn = brw_next_insn(p, BRW_OPCODE_SEND);
1761 	}
1762 	/* The execution mask is ignored for render target writes. */
1763 	insn->header.predicate_control = 0;
1764 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1765 
1766 	if (p->gen >= 060) {
1767 		/* headerless version, just submit color payload */
1768 		src0 = brw_message_reg(msg_reg_nr);
1769 
1770 		msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1771 	} else {
1772 		insn->header.destreg__conditionalmod = msg_reg_nr;
1773 
1774 		msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1775 	}
1776 
1777 	brw_set_dest(p, insn, dest);
1778 	brw_set_src0(p, insn, src0);
1779 	brw_set_dp_write_message(p,
1780 				 insn,
1781 				 binding_table_index,
1782 				 msg_control,
1783 				 msg_type,
1784 				 msg_length,
1785 				 header_present,
1786 				 eot,
1787 				 response_length,
1788 				 eot,
1789 				 0 /* send_commit_msg */);
1790 }
1791 
1792 /**
1793  * Texture sample instruction.
1794  * Note: the msg_type plus msg_length values determine exactly what kind
1795  * of sampling operation is performed.  See volume 4, page 161 of docs.
1796  */
brw_SAMPLE(struct brw_compile * p,struct brw_reg dest,unsigned msg_reg_nr,struct brw_reg src0,unsigned binding_table_index,unsigned sampler,unsigned writemask,unsigned msg_type,unsigned response_length,unsigned msg_length,bool header_present,unsigned simd_mode)1797 void brw_SAMPLE(struct brw_compile *p,
1798 		struct brw_reg dest,
1799 		unsigned msg_reg_nr,
1800 		struct brw_reg src0,
1801 		unsigned binding_table_index,
1802 		unsigned sampler,
1803 		unsigned writemask,
1804 		unsigned msg_type,
1805 		unsigned response_length,
1806 		unsigned msg_length,
1807 		bool header_present,
1808 		unsigned simd_mode)
1809 {
1810 	assert(writemask);
1811 
1812 	if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
1813 		struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1814 
1815 		writemask = ~writemask & WRITEMASK_XYZW;
1816 
1817 		brw_push_insn_state(p);
1818 
1819 		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1820 		brw_set_mask_control(p, BRW_MASK_DISABLE);
1821 
1822 		brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
1823 		brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
1824 
1825 		brw_pop_insn_state(p);
1826 
1827 		src0 = __retype_uw(brw_null_reg());
1828 	}
1829 
1830 	{
1831 		struct brw_instruction *insn;
1832 
1833 		gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1834 
1835 		insn = brw_next_insn(p, BRW_OPCODE_SEND);
1836 		insn->header.predicate_control = 0; /* XXX */
1837 		insn->header.compression_control = BRW_COMPRESSION_NONE;
1838 		if (p->gen < 060)
1839 			insn->header.destreg__conditionalmod = msg_reg_nr;
1840 
1841 		brw_set_dest(p, insn, dest);
1842 		brw_set_src0(p, insn, src0);
1843 		brw_set_sampler_message(p, insn,
1844 					binding_table_index,
1845 					sampler,
1846 					msg_type,
1847 					response_length,
1848 					msg_length,
1849 					header_present,
1850 					simd_mode);
1851 	}
1852 }
1853 
1854 /* All these variables are pretty confusing - we might be better off
1855  * using bitmasks and macros for this, in the old style.  Or perhaps
1856  * just having the caller instantiate the fields in dword3 itself.
1857  */
brw_urb_WRITE(struct brw_compile * p,struct brw_reg dest,unsigned msg_reg_nr,struct brw_reg src0,bool allocate,bool used,unsigned msg_length,unsigned response_length,bool eot,bool writes_complete,unsigned offset,unsigned swizzle)1858 void brw_urb_WRITE(struct brw_compile *p,
1859 		   struct brw_reg dest,
1860 		   unsigned msg_reg_nr,
1861 		   struct brw_reg src0,
1862 		   bool allocate,
1863 		   bool used,
1864 		   unsigned msg_length,
1865 		   unsigned response_length,
1866 		   bool eot,
1867 		   bool writes_complete,
1868 		   unsigned offset,
1869 		   unsigned swizzle)
1870 {
1871 	struct brw_instruction *insn;
1872 
1873 	gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1874 
1875 	if (p->gen >= 070) {
1876 		/* Enable Channel Masks in the URB_WRITE_HWORD message header */
1877 		brw_push_insn_state(p);
1878 		brw_set_access_mode(p, BRW_ALIGN_1);
1879 		brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
1880 		       __retype_ud(brw_vec1_grf(0, 5)),
1881 		       brw_imm_ud(0xff00));
1882 		brw_pop_insn_state(p);
1883 	}
1884 
1885 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1886 
1887 	assert(msg_length < BRW_MAX_MRF);
1888 
1889 	brw_set_dest(p, insn, dest);
1890 	brw_set_src0(p, insn, src0);
1891 	brw_set_src1(p, insn, brw_imm_d(0));
1892 
1893 	if (p->gen <= 060)
1894 		insn->header.destreg__conditionalmod = msg_reg_nr;
1895 
1896 	brw_set_urb_message(p,
1897 			    insn,
1898 			    allocate,
1899 			    used,
1900 			    msg_length,
1901 			    response_length,
1902 			    eot,
1903 			    writes_complete,
1904 			    offset,
1905 			    swizzle);
1906 }
1907 
1908 static int
brw_find_next_block_end(struct brw_compile * p,int start)1909 brw_find_next_block_end(struct brw_compile *p, int start)
1910 {
1911 	int ip;
1912 
1913 	for (ip = start + 1; ip < p->nr_insn; ip++) {
1914 		struct brw_instruction *insn = &p->store[ip];
1915 
1916 		switch (insn->header.opcode) {
1917 		case BRW_OPCODE_ENDIF:
1918 		case BRW_OPCODE_ELSE:
1919 		case BRW_OPCODE_WHILE:
1920 			return ip;
1921 		}
1922 	}
1923 	assert(!"not reached");
1924 	return start + 1;
1925 }
1926 
1927 /* There is no DO instruction on gen6, so to find the end of the loop
1928  * we have to see if the loop is jumping back before our start
1929  * instruction.
1930  */
1931 static int
brw_find_loop_end(struct brw_compile * p,int start)1932 brw_find_loop_end(struct brw_compile *p, int start)
1933 {
1934 	int ip;
1935 	int br = 2;
1936 
1937 	for (ip = start + 1; ip < p->nr_insn; ip++) {
1938 		struct brw_instruction *insn = &p->store[ip];
1939 
1940 		if (insn->header.opcode == BRW_OPCODE_WHILE) {
1941 			int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
1942 				: insn->bits3.break_cont.jip;
1943 			if (ip + jip / br <= start)
1944 				return ip;
1945 		}
1946 	}
1947 	assert(!"not reached");
1948 	return start + 1;
1949 }
1950 
1951 /* After program generation, go back and update the UIP and JIP of
1952  * BREAK and CONT instructions to their correct locations.
1953  */
1954 void
brw_set_uip_jip(struct brw_compile * p)1955 brw_set_uip_jip(struct brw_compile *p)
1956 {
1957 	int ip;
1958 	int br = 2;
1959 
1960 	if (p->gen <= 060)
1961 		return;
1962 
1963 	for (ip = 0; ip < p->nr_insn; ip++) {
1964 		struct brw_instruction *insn = &p->store[ip];
1965 
1966 		switch (insn->header.opcode) {
1967 		case BRW_OPCODE_BREAK:
1968 			insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1969 			/* Gen7 UIP points to WHILE; Gen6 points just after it */
1970 			insn->bits3.break_cont.uip =
1971 				br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
1972 			break;
1973 		case BRW_OPCODE_CONTINUE:
1974 			insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1975 			insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
1976 
1977 			assert(insn->bits3.break_cont.uip != 0);
1978 			assert(insn->bits3.break_cont.jip != 0);
1979 			break;
1980 		}
1981 	}
1982 }
1983 
brw_ff_sync(struct brw_compile * p,struct brw_reg dest,unsigned msg_reg_nr,struct brw_reg src0,bool allocate,unsigned response_length,bool eot)1984 void brw_ff_sync(struct brw_compile *p,
1985 		   struct brw_reg dest,
1986 		   unsigned msg_reg_nr,
1987 		   struct brw_reg src0,
1988 		   bool allocate,
1989 		   unsigned response_length,
1990 		   bool eot)
1991 {
1992 	struct brw_instruction *insn;
1993 
1994 	gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1995 
1996 	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1997 	brw_set_dest(p, insn, dest);
1998 	brw_set_src0(p, insn, src0);
1999 	brw_set_src1(p, insn, brw_imm_d(0));
2000 
2001 	if (p->gen < 060)
2002 		insn->header.destreg__conditionalmod = msg_reg_nr;
2003 
2004 	brw_set_ff_sync_message(p,
2005 				insn,
2006 				allocate,
2007 				response_length,
2008 				eot);
2009 }
2010