1 /*
2    Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3    Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4    develop this 3D driver.
5 
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    "Software"), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13 
14    The above copyright notice and this permission notice (including the
15    next paragraph) shall be included in all copies or substantial
16    portions of the Software.
17 
18    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21    IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31 
32 #include "cairoint.h"
33 #include "cairo-drm-intel-brw-eu.h"
34 
35 #include <string.h>
36 
37 /***********************************************************************
38  * Internal helper for constructing instructions
39  */
40 
guess_execution_size(struct brw_instruction * insn,struct brw_reg reg)41 static void guess_execution_size( struct brw_instruction *insn,
42 				  struct brw_reg reg )
43 {
44     if (reg.width == BRW_WIDTH_8 &&
45 	insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
46 	insn->header.execution_size = BRW_EXECUTE_16;
47     else
48 	insn->header.execution_size = reg.width;	/* note - definitions are compatible */
49 }
50 
51 
52 void
brw_instruction_set_destination(struct brw_instruction * insn,struct brw_reg dest)53 brw_instruction_set_destination (struct brw_instruction *insn,
54 				 struct brw_reg dest)
55 {
56     insn->bits1.da1.dest_reg_file = dest.file;
57     insn->bits1.da1.dest_reg_type = dest.type;
58     insn->bits1.da1.dest_address_mode = dest.address_mode;
59 
60     if (dest.address_mode == BRW_ADDRESS_DIRECT) {
61 	insn->bits1.da1.dest_reg_nr = dest.nr;
62 
63 	if (insn->header.access_mode == BRW_ALIGN_1) {
64 	    insn->bits1.da1.dest_subreg_nr = dest.subnr;
65 	    if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
66 		dest.hstride = BRW_HORIZONTAL_STRIDE_1;
67 	    insn->bits1.da1.dest_horiz_stride = dest.hstride;
68 	} else {
69 	    insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
70 	    insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
71 	}
72     } else {
73 	insn->bits1.ia1.dest_subreg_nr = dest.subnr;
74 
75 	/* These are different sizes in align1 vs align16:
76 	*/
77 	if (insn->header.access_mode == BRW_ALIGN_1) {
78 	    insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
79 	    if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
80 		dest.hstride = BRW_HORIZONTAL_STRIDE_1;
81 	    insn->bits1.ia1.dest_horiz_stride = dest.hstride;
82 	} else {
83 	    insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
84 	}
85     }
86 
87     /* NEW: Set the execution size based on dest.width and
88      * insn->compression_control:
89      */
90     guess_execution_size(insn, dest);
91 }
92 
93 void
brw_instruction_set_source0(struct brw_instruction * insn,struct brw_reg reg)94 brw_instruction_set_source0 (struct brw_instruction *insn,
95 			     struct brw_reg reg)
96 {
97     assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98 
99     insn->bits1.da1.src0_reg_file = reg.file;
100     insn->bits1.da1.src0_reg_type = reg.type;
101     insn->bits2.da1.src0_abs = reg.abs;
102     insn->bits2.da1.src0_negate = reg.negate;
103     insn->bits2.da1.src0_address_mode = reg.address_mode;
104 
105     if (reg.file == BRW_IMMEDIATE_VALUE) {
106 	insn->bits3.ud = reg.dw1.ud;
107 
108 	/* Required to set some fields in src1 as well:
109 	*/
110 	insn->bits1.da1.src1_reg_file = 0; /* arf */
111 	insn->bits1.da1.src1_reg_type = reg.type;
112     } else {
113 	if (reg.address_mode == BRW_ADDRESS_DIRECT) {
114 	    if (insn->header.access_mode == BRW_ALIGN_1) {
115 		insn->bits2.da1.src0_subreg_nr = reg.subnr;
116 		insn->bits2.da1.src0_reg_nr = reg.nr;
117 	    } else {
118 		insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
119 		insn->bits2.da16.src0_reg_nr = reg.nr;
120 	    }
121 	} else {
122 	    insn->bits2.ia1.src0_subreg_nr = reg.subnr;
123 
124 	    if (insn->header.access_mode == BRW_ALIGN_1) {
125 		insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
126 	    } else {
127 		insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
128 	    }
129 	}
130 
131 	if (insn->header.access_mode == BRW_ALIGN_1) {
132 	    if (reg.width == BRW_WIDTH_1 &&
133 		insn->header.execution_size == BRW_EXECUTE_1) {
134 		insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
135 		insn->bits2.da1.src0_width = BRW_WIDTH_1;
136 		insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
137 	    } else {
138 		insn->bits2.da1.src0_horiz_stride = reg.hstride;
139 		insn->bits2.da1.src0_width = reg.width;
140 		insn->bits2.da1.src0_vert_stride = reg.vstride;
141 	    }
142 	} else {
143 	    insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
144 	    insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
145 	    insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
146 	    insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
147 
148 	    /* This is an oddity of the fact we're using the same
149 	     * descriptions for registers in align_16 as align_1:
150 	     */
151 	    if (reg.vstride == BRW_VERTICAL_STRIDE_8)
152 		insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
153 	    else
154 		insn->bits2.da16.src0_vert_stride = reg.vstride;
155 	}
156     }
157 }
158 
159 
brw_set_src1(struct brw_instruction * insn,struct brw_reg reg)160 void brw_set_src1( struct brw_instruction *insn,
161 		   struct brw_reg reg )
162 {
163     assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
164 
165     insn->bits1.da1.src1_reg_file = reg.file;
166     insn->bits1.da1.src1_reg_type = reg.type;
167     insn->bits3.da1.src1_abs = reg.abs;
168     insn->bits3.da1.src1_negate = reg.negate;
169 
170     /* Only src1 can be immediate in two-argument instructions.
171     */
172     assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
173 
174     if (reg.file == BRW_IMMEDIATE_VALUE) {
175 	insn->bits3.ud = reg.dw1.ud;
176     }
177     else {
178 	/* This is a hardware restriction, which may or may not be lifted
179 	 * in the future:
180 	 */
181 	assert (reg.address_mode == BRW_ADDRESS_DIRECT);
182 	//assert (reg.file == BRW_GENERAL_REGISTER_FILE);
183 
184 	if (insn->header.access_mode == BRW_ALIGN_1) {
185 	    insn->bits3.da1.src1_subreg_nr = reg.subnr;
186 	    insn->bits3.da1.src1_reg_nr = reg.nr;
187 	}
188 	else {
189 	    insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
190 	    insn->bits3.da16.src1_reg_nr = reg.nr;
191 	}
192 
193 	if (insn->header.access_mode == BRW_ALIGN_1) {
194 	    if (reg.width == BRW_WIDTH_1 &&
195 		insn->header.execution_size == BRW_EXECUTE_1) {
196 		insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
197 		insn->bits3.da1.src1_width = BRW_WIDTH_1;
198 		insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
199 	    }
200 	    else {
201 		insn->bits3.da1.src1_horiz_stride = reg.hstride;
202 		insn->bits3.da1.src1_width = reg.width;
203 		insn->bits3.da1.src1_vert_stride = reg.vstride;
204 	    }
205 	}
206 	else {
207 	    insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
208 	    insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
209 	    insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
210 	    insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
211 
212 	    /* This is an oddity of the fact we're using the same
213 	     * descriptions for registers in align_16 as align_1:
214 	     */
215 	    if (reg.vstride == BRW_VERTICAL_STRIDE_8)
216 		insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
217 	    else
218 		insn->bits3.da16.src1_vert_stride = reg.vstride;
219 	}
220     }
221 }
222 
223 
224 
brw_set_math_message(struct brw_instruction * insn,uint32_t msg_length,uint32_t response_length,uint32_t function,uint32_t integer_type,int low_precision,int saturate,uint32_t dataType)225 static void brw_set_math_message( struct brw_instruction *insn,
226 				  uint32_t msg_length,
227 				  uint32_t response_length,
228 				  uint32_t function,
229 				  uint32_t integer_type,
230 				  int low_precision,
231 				  int saturate,
232 				  uint32_t dataType )
233 {
234     brw_set_src1 (insn, brw_imm_d (0));
235 
236     insn->bits3.math.function = function;
237     insn->bits3.math.int_type = integer_type;
238     insn->bits3.math.precision = low_precision;
239     insn->bits3.math.saturate = saturate;
240     insn->bits3.math.data_type = dataType;
241     insn->bits3.math.response_length = response_length;
242     insn->bits3.math.msg_length = msg_length;
243     insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
244     insn->bits3.math.end_of_thread = 0;
245 }
246 
brw_set_urb_message(struct brw_instruction * insn,int allocate,int used,uint32_t msg_length,uint32_t response_length,int end_of_thread,int complete,uint32_t offset,uint32_t swizzle_control)247 static void brw_set_urb_message( struct brw_instruction *insn,
248 				 int allocate,
249 				 int used,
250 				 uint32_t msg_length,
251 				 uint32_t response_length,
252 				 int end_of_thread,
253 				 int complete,
254 				 uint32_t offset,
255 				 uint32_t swizzle_control )
256 {
257     brw_set_src1 (insn, brw_imm_d (0));
258 
259     insn->bits3.urb.opcode = 0;	/* ? */
260     insn->bits3.urb.offset = offset;
261     insn->bits3.urb.swizzle_control = swizzle_control;
262     insn->bits3.urb.allocate = allocate;
263     insn->bits3.urb.used = used;	/* ? */
264     insn->bits3.urb.complete = complete;
265     insn->bits3.urb.response_length = response_length;
266     insn->bits3.urb.msg_length = msg_length;
267     insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
268     insn->bits3.urb.end_of_thread = end_of_thread;
269 }
270 
271 void
brw_instruction_set_dp_write_message(struct brw_instruction * insn,uint32_t binding_table_index,uint32_t msg_control,uint32_t msg_type,uint32_t msg_length,uint32_t pixel_scoreboard_clear,uint32_t response_length,uint32_t end_of_thread)272 brw_instruction_set_dp_write_message (struct brw_instruction *insn,
273 				      uint32_t binding_table_index,
274 				      uint32_t msg_control,
275 				      uint32_t msg_type,
276 				      uint32_t msg_length,
277 				      uint32_t pixel_scoreboard_clear,
278 				      uint32_t response_length,
279 				      uint32_t end_of_thread)
280 {
281     brw_set_src1 (insn, brw_imm_d (0));
282 
283     insn->bits3.dp_write.binding_table_index = binding_table_index;
284     insn->bits3.dp_write.msg_control = msg_control;
285     insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
286     insn->bits3.dp_write.msg_type = msg_type;
287     insn->bits3.dp_write.send_commit_msg = 0;
288     insn->bits3.dp_write.response_length = response_length;
289     insn->bits3.dp_write.msg_length = msg_length;
290     insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
291     insn->bits3.urb.end_of_thread = end_of_thread;
292 }
293 
brw_set_dp_read_message(struct brw_instruction * insn,uint32_t binding_table_index,uint32_t msg_control,uint32_t msg_type,uint32_t target_cache,uint32_t msg_length,uint32_t response_length,uint32_t end_of_thread)294 static void brw_set_dp_read_message( struct brw_instruction *insn,
295 				     uint32_t binding_table_index,
296 				     uint32_t msg_control,
297 				     uint32_t msg_type,
298 				     uint32_t target_cache,
299 				     uint32_t msg_length,
300 				     uint32_t response_length,
301 				     uint32_t end_of_thread )
302 {
303     brw_set_src1 (insn, brw_imm_d (0));
304 
305     insn->bits3.dp_read.binding_table_index = binding_table_index;
306     insn->bits3.dp_read.msg_control = msg_control;
307     insn->bits3.dp_read.msg_type = msg_type;
308     insn->bits3.dp_read.target_cache = target_cache;
309     insn->bits3.dp_read.response_length = response_length;
310     insn->bits3.dp_read.msg_length = msg_length;
311     insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
312     insn->bits3.dp_read.end_of_thread = end_of_thread;
313 }
314 
315 static void
brw_set_sampler_message(struct brw_instruction * insn,cairo_bool_t is_g4x,uint32_t binding_table_index,uint32_t sampler,uint32_t msg_type,uint32_t response_length,uint32_t msg_length,cairo_bool_t eot)316 brw_set_sampler_message (struct brw_instruction *insn,
317 			 cairo_bool_t is_g4x,
318 			 uint32_t binding_table_index,
319 			 uint32_t sampler,
320 			 uint32_t msg_type,
321 			 uint32_t response_length,
322 			 uint32_t msg_length,
323 			 cairo_bool_t eot)
324 {
325     brw_set_src1 (insn, brw_imm_d (0));
326 
327     if (is_g4x) {
328 	/* XXX presume the driver is sane! */
329 	insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
330 	insn->bits3.sampler_g4x.sampler = sampler;
331 	insn->bits3.sampler_g4x.msg_type = msg_type;
332 	insn->bits3.sampler_g4x.response_length = response_length;
333 	insn->bits3.sampler_g4x.msg_length = msg_length;
334 	insn->bits3.sampler_g4x.end_of_thread = eot;
335 	insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
336     } else {
337 	insn->bits3.sampler.binding_table_index = binding_table_index;
338 	insn->bits3.sampler.sampler = sampler;
339 	insn->bits3.sampler.msg_type = msg_type;
340 	insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
341 	insn->bits3.sampler.response_length = response_length;
342 	insn->bits3.sampler.msg_length = msg_length;
343 	insn->bits3.sampler.end_of_thread = eot;
344 	insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
345     }
346 }
347 
348 struct brw_instruction *
brw_next_instruction(struct brw_compile * p,uint32_t opcode)349 brw_next_instruction (struct brw_compile *p,
350 		      uint32_t opcode)
351 {
352     struct brw_instruction *insn;
353 
354     assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
355 
356     insn = &p->store[p->nr_insn++];
357     memcpy(insn, p->current, sizeof(*insn));
358 
359     /* Reset this one-shot flag: */
360     if (p->current->header.destreg__conditonalmod) {
361 	p->current->header.destreg__conditonalmod = 0;
362 	p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
363     }
364 
365     insn->header.opcode = opcode;
366     return insn;
367 }
368 
brw_alu1(struct brw_compile * p,uint32_t opcode,struct brw_reg dest,struct brw_reg src)369 static struct brw_instruction *brw_alu1( struct brw_compile *p,
370 					 uint32_t opcode,
371 					 struct brw_reg dest,
372 					 struct brw_reg src )
373 {
374     struct brw_instruction *insn = brw_next_instruction(p, opcode);
375     brw_instruction_set_destination(insn, dest);
376     brw_instruction_set_source0(insn, src);
377     return insn;
378 }
379 
brw_alu2(struct brw_compile * p,uint32_t opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)380 static struct brw_instruction *brw_alu2(struct brw_compile *p,
381 					uint32_t opcode,
382 					struct brw_reg dest,
383 					struct brw_reg src0,
384 					struct brw_reg src1 )
385 {
386     struct brw_instruction *insn = brw_next_instruction(p, opcode);
387     brw_instruction_set_destination(insn, dest);
388     brw_instruction_set_source0(insn, src0);
389     brw_set_src1(insn, src1);
390     return insn;
391 }
392 
393 
394 /***********************************************************************
395  * Convenience routines.
396  */
397 #define ALU1(OP)					\
398     struct brw_instruction *brw_##OP(struct brw_compile *p,			\
399 				     struct brw_reg dest,			\
400 				     struct brw_reg src0)			\
401 {							\
402     return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);	\
403 }
404 
405 #define ALU2(OP)					\
406     struct brw_instruction *brw_##OP(struct brw_compile *p,			\
407 				     struct brw_reg dest,			\
408 				     struct brw_reg src0,			\
409 				     struct brw_reg src1)			\
410 {							\
411     return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
412 }
413 
414 
415     ALU1(MOV)
ALU2(SEL)416     ALU2(SEL)
417     ALU1(NOT)
418     ALU2(AND)
419     ALU2(OR)
420     ALU2(XOR)
421     ALU2(SHR)
422     ALU2(SHL)
423     ALU2(RSR)
424     ALU2(RSL)
425     ALU2(ASR)
426     ALU2(ADD)
427     ALU2(MUL)
428     ALU1(FRC)
429     ALU1(RNDD)
430     ALU1(RNDZ)
431     ALU2(MAC)
432     ALU2(MACH)
433     ALU1(LZD)
434     ALU2(DP4)
435     ALU2(DPH)
436     ALU2(DP3)
437     ALU2(DP2)
438 ALU2(LINE)
439 
440 
441 
442 
443 void brw_NOP(struct brw_compile *p)
444 {
445     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_NOP);
446     brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
447     brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
448     brw_set_src1(insn, brw_imm_ud(0x0));
449 }
450 
451 
452 
453 
454 
455 /***********************************************************************
456  * Comparisons, if/else/endif
457  */
458 
brw_JMPI(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)459 struct brw_instruction *brw_JMPI(struct brw_compile *p,
460 				 struct brw_reg dest,
461 				 struct brw_reg src0,
462 				 struct brw_reg src1)
463 {
464     struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
465 
466     p->current->header.predicate_control = BRW_PREDICATE_NONE;
467 
468     return insn;
469 }
470 
471 /* EU takes the value from the flag register and pushes it onto some
472  * sort of a stack (presumably merging with any flag value already on
473  * the stack).  Within an if block, the flags at the top of the stack
474  * control execution on each channel of the unit, eg. on each of the
475  * 16 pixel values in our wm programs.
476  *
477  * When the matching 'else' instruction is reached (presumably by
478  * countdown of the instruction count patched in by our ELSE/ENDIF
479  * functions), the relevant flags are inverted.
480  *
481  * When the matching 'endif' instruction is reached, the flags are
482  * popped off.  If the stack is now empty, normal execution resumes.
483  *
484  * No attempt is made to deal with stack overflow (14 elements?).
485  */
brw_IF(struct brw_compile * p,uint32_t execute_size)486 struct brw_instruction *brw_IF(struct brw_compile *p, uint32_t execute_size)
487 {
488     struct brw_instruction *insn;
489 
490     if (p->single_program_flow) {
491 	assert(execute_size == BRW_EXECUTE_1);
492 
493 	insn = brw_next_instruction(p, BRW_OPCODE_ADD);
494 	insn->header.predicate_inverse = 1;
495     } else {
496 	insn = brw_next_instruction(p, BRW_OPCODE_IF);
497     }
498 
499     /* Override the defaults for this instruction:
500     */
501     brw_instruction_set_destination (insn, brw_ip_reg ());
502     brw_instruction_set_source0 (insn, brw_ip_reg ());
503     brw_set_src1 (insn, brw_imm_d (0));
504 
505     insn->header.execution_size = execute_size;
506     insn->header.compression_control = BRW_COMPRESSION_NONE;
507     insn->header.predicate_control = BRW_PREDICATE_NORMAL;
508     insn->header.mask_control = BRW_MASK_ENABLE;
509     if (!p->single_program_flow)
510 	insn->header.thread_control = BRW_THREAD_SWITCH;
511 
512     p->current->header.predicate_control = BRW_PREDICATE_NONE;
513 
514     return insn;
515 }
516 
517 
brw_ELSE(struct brw_compile * p,struct brw_instruction * if_insn)518 struct brw_instruction *brw_ELSE(struct brw_compile *p,
519 				 struct brw_instruction *if_insn)
520 {
521     struct brw_instruction *insn;
522 
523     if (p->single_program_flow) {
524 	insn = brw_next_instruction(p, BRW_OPCODE_ADD);
525     } else {
526 	insn = brw_next_instruction(p, BRW_OPCODE_ELSE);
527     }
528 
529     brw_instruction_set_destination (insn, brw_ip_reg ());
530     brw_instruction_set_source0 (insn, brw_ip_reg ());
531     brw_set_src1 (insn, brw_imm_d (0));
532 
533     insn->header.compression_control = BRW_COMPRESSION_NONE;
534     insn->header.execution_size = if_insn->header.execution_size;
535     insn->header.mask_control = BRW_MASK_ENABLE;
536     if (!p->single_program_flow)
537 	insn->header.thread_control = BRW_THREAD_SWITCH;
538 
539     /* Patch the if instruction to point at this instruction.
540     */
541     if (p->single_program_flow) {
542 	assert(if_insn->header.opcode == BRW_OPCODE_ADD);
543 
544 	if_insn->bits3.ud = (insn - if_insn + 1) * 16;
545     } else {
546 	assert(if_insn->header.opcode == BRW_OPCODE_IF);
547 
548 	if_insn->bits3.if_else.jump_count = insn - if_insn;
549 	if_insn->bits3.if_else.pop_count = 1;
550 	if_insn->bits3.if_else.pad0 = 0;
551     }
552 
553     return insn;
554 }
555 
brw_ENDIF(struct brw_compile * p,struct brw_instruction * patch_insn)556 void brw_ENDIF(struct brw_compile *p,
557 	       struct brw_instruction *patch_insn)
558 {
559     if (p->single_program_flow) {
560 	/* In single program flow mode, there's no need to execute an ENDIF,
561 	 * since we don't need to do any stack operations, and if we're executing
562 	 * currently, we want to just continue executing.
563 	 */
564 	struct brw_instruction *next = &p->store[p->nr_insn];
565 
566 	assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
567 
568 	patch_insn->bits3.ud = (next - patch_insn) * 16;
569     } else {
570 	struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_ENDIF);
571 
572 	brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
573 	brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
574 	brw_set_src1 (insn, brw_imm_d (0));
575 
576 	insn->header.compression_control = BRW_COMPRESSION_NONE;
577 	insn->header.execution_size = patch_insn->header.execution_size;
578 	insn->header.mask_control = BRW_MASK_ENABLE;
579 	insn->header.thread_control = BRW_THREAD_SWITCH;
580 
581 	assert(patch_insn->bits3.if_else.jump_count == 0);
582 
583 	/* Patch the if or else instructions to point at this or the next
584 	 * instruction respectively.
585 	 */
586 	if (patch_insn->header.opcode == BRW_OPCODE_IF) {
587 	    /* Automagically turn it into an IFF:
588 	    */
589 	    patch_insn->header.opcode = BRW_OPCODE_IFF;
590 	    patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
591 	    patch_insn->bits3.if_else.pop_count = 0;
592 	    patch_insn->bits3.if_else.pad0 = 0;
593 	} else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
594 	    patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
595 	    patch_insn->bits3.if_else.pop_count = 1;
596 	    patch_insn->bits3.if_else.pad0 = 0;
597 	} else {
598 	    assert(0);
599 	}
600 
601 	/* Also pop item off the stack in the endif instruction:
602 	*/
603 	insn->bits3.if_else.jump_count = 0;
604 	insn->bits3.if_else.pop_count = 1;
605 	insn->bits3.if_else.pad0 = 0;
606     }
607 }
608 
brw_BREAK(struct brw_compile * p)609 struct brw_instruction *brw_BREAK(struct brw_compile *p)
610 {
611     struct brw_instruction *insn;
612     insn = brw_next_instruction(p, BRW_OPCODE_BREAK);
613     brw_instruction_set_destination(insn, brw_ip_reg());
614     brw_instruction_set_source0(insn, brw_ip_reg());
615     brw_set_src1(insn, brw_imm_d (0));
616     insn->header.compression_control = BRW_COMPRESSION_NONE;
617     insn->header.execution_size = BRW_EXECUTE_8;
618     /* insn->header.mask_control = BRW_MASK_DISABLE; */
619     insn->bits3.if_else.pad0 = 0;
620     return insn;
621 }
622 
brw_CONT(struct brw_compile * p)623 struct brw_instruction *brw_CONT(struct brw_compile *p)
624 {
625     struct brw_instruction *insn;
626     insn = brw_next_instruction(p, BRW_OPCODE_CONTINUE);
627     brw_instruction_set_destination(insn, brw_ip_reg());
628     brw_instruction_set_source0(insn, brw_ip_reg());
629     brw_set_src1 (insn, brw_imm_d (0));
630     insn->header.compression_control = BRW_COMPRESSION_NONE;
631     insn->header.execution_size = BRW_EXECUTE_8;
632     /* insn->header.mask_control = BRW_MASK_DISABLE; */
633     insn->bits3.if_else.pad0 = 0;
634     return insn;
635 }
636 
637 /* DO/WHILE loop:
638 */
brw_DO(struct brw_compile * p,uint32_t execute_size)639 struct brw_instruction *brw_DO(struct brw_compile *p, uint32_t execute_size)
640 {
641     if (p->single_program_flow) {
642 	return &p->store[p->nr_insn];
643     } else {
644 	struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_DO);
645 
646 	/* Override the defaults for this instruction:
647 	*/
648 	brw_instruction_set_destination(insn, brw_null_reg());
649 	brw_instruction_set_source0(insn, brw_null_reg());
650 	brw_set_src1(insn, brw_null_reg());
651 
652 	insn->header.compression_control = BRW_COMPRESSION_NONE;
653 	insn->header.execution_size = execute_size;
654 	insn->header.predicate_control = BRW_PREDICATE_NONE;
655 	/* insn->header.mask_control = BRW_MASK_ENABLE; */
656 	/* insn->header.mask_control = BRW_MASK_DISABLE; */
657 
658 	return insn;
659     }
660 }
661 
662 
663 
brw_WHILE(struct brw_compile * p,struct brw_instruction * do_insn)664 struct brw_instruction *brw_WHILE(struct brw_compile *p,
665 				  struct brw_instruction *do_insn)
666 {
667     struct brw_instruction *insn;
668 
669     if (p->single_program_flow)
670 	insn = brw_next_instruction(p, BRW_OPCODE_ADD);
671     else
672 	insn = brw_next_instruction(p, BRW_OPCODE_WHILE);
673 
674     brw_instruction_set_destination(insn, brw_ip_reg());
675     brw_instruction_set_source0(insn, brw_ip_reg());
676     brw_set_src1 (insn, brw_imm_d (0));
677 
678     insn->header.compression_control = BRW_COMPRESSION_NONE;
679 
680     if (p->single_program_flow) {
681 	insn->header.execution_size = BRW_EXECUTE_1;
682 
683 	insn->bits3.d = (do_insn - insn) * 16;
684     } else {
685 	insn->header.execution_size = do_insn->header.execution_size;
686 
687 	assert(do_insn->header.opcode == BRW_OPCODE_DO);
688 	insn->bits3.if_else.jump_count = do_insn - insn + 1;
689 	insn->bits3.if_else.pop_count = 0;
690 	insn->bits3.if_else.pad0 = 0;
691     }
692 
693     /*    insn->header.mask_control = BRW_MASK_ENABLE; */
694 
695     /* insn->header.mask_control = BRW_MASK_DISABLE; */
696     p->current->header.predicate_control = BRW_PREDICATE_NONE;
697     return insn;
698 }
699 
700 
701 /* FORWARD JUMPS:
702 */
brw_land_fwd_jump(struct brw_compile * p,struct brw_instruction * jmp_insn)703 void brw_land_fwd_jump(struct brw_compile *p,
704 		       struct brw_instruction *jmp_insn)
705 {
706     struct brw_instruction *landing = &p->store[p->nr_insn];
707 
708     assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
709     assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
710 
711     jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
712 }
713 
714 
715 
716 /* To integrate with the above, it makes sense that the comparison
717  * instruction should populate the flag register.  It might be simpler
718  * just to use the flag reg for most WM tasks?
719  */
brw_CMP(struct brw_compile * p,struct brw_reg dest,uint32_t conditional,struct brw_reg src0,struct brw_reg src1)720 void brw_CMP(struct brw_compile *p,
721 	     struct brw_reg dest,
722 	     uint32_t conditional,
723 	     struct brw_reg src0,
724 	     struct brw_reg src1)
725 {
726     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_CMP);
727 
728     insn->header.destreg__conditonalmod = conditional;
729     brw_instruction_set_destination(insn, dest);
730     brw_instruction_set_source0(insn, src0);
731     brw_set_src1(insn, src1);
732 
733     /*    guess_execution_size(insn, src0); */
734 
735 
736     /* Make it so that future instructions will use the computed flag
737      * value until brw_set_predicate_control_flag_value() is called
738      * again.
739      */
740     if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
741 	dest.nr == 0) {
742 	p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
743 	p->flag_value = 0xff;
744     }
745 }
746 
747 
748 
749 /***********************************************************************
750  * Helpers for the various SEND message types:
751  */
752 
753 /* Invert 8 values
754 */
brw_math(struct brw_compile * p,struct brw_reg dest,uint32_t function,uint32_t saturate,uint32_t msg_reg_nr,struct brw_reg src,uint32_t data_type,uint32_t precision)755 void brw_math( struct brw_compile *p,
756 	       struct brw_reg dest,
757 	       uint32_t function,
758 	       uint32_t saturate,
759 	       uint32_t msg_reg_nr,
760 	       struct brw_reg src,
761 	       uint32_t data_type,
762 	       uint32_t precision )
763 {
764     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
765     uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
766     uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
767 
768     /* Example code doesn't set predicate_control for send
769      * instructions.
770      */
771     insn->header.predicate_control = 0;
772     insn->header.destreg__conditonalmod = msg_reg_nr;
773 
774     response_length = 1;
775 
776     brw_instruction_set_destination(insn, dest);
777     brw_instruction_set_source0(insn, src);
778     brw_set_math_message(insn,
779 			 msg_length, response_length,
780 			 function,
781 			 BRW_MATH_INTEGER_UNSIGNED,
782 			 precision,
783 			 saturate,
784 			 data_type);
785 }
786 
787 /* Use 2 send instructions to invert 16 elements
788 */
brw_math_16(struct brw_compile * p,struct brw_reg dest,uint32_t function,uint32_t saturate,uint32_t msg_reg_nr,struct brw_reg src,uint32_t precision)789 void brw_math_16( struct brw_compile *p,
790 		  struct brw_reg dest,
791 		  uint32_t function,
792 		  uint32_t saturate,
793 		  uint32_t msg_reg_nr,
794 		  struct brw_reg src,
795 		  uint32_t precision )
796 {
797     struct brw_instruction *insn;
798     uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
799     uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
800 
801     /* First instruction:
802     */
803     brw_push_insn_state(p);
804     brw_set_predicate_control_flag_value(p, 0xff);
805     brw_set_compression_control(p, BRW_COMPRESSION_NONE);
806 
807     insn = brw_next_instruction(p, BRW_OPCODE_SEND);
808     insn->header.destreg__conditonalmod = msg_reg_nr;
809 
810     brw_instruction_set_destination(insn, dest);
811     brw_instruction_set_source0(insn, src);
812     brw_set_math_message(insn,
813 			 msg_length, response_length,
814 			 function,
815 			 BRW_MATH_INTEGER_UNSIGNED,
816 			 precision,
817 			 saturate,
818 			 BRW_MATH_DATA_VECTOR);
819 
820     /* Second instruction:
821     */
822     insn = brw_next_instruction(p, BRW_OPCODE_SEND);
823     insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
824     insn->header.destreg__conditonalmod = msg_reg_nr+1;
825 
826     brw_instruction_set_destination(insn, offset(dest,1));
827     brw_instruction_set_source0(insn, src);
828     brw_set_math_message(insn,
829 			 msg_length, response_length,
830 			 function,
831 			 BRW_MATH_INTEGER_UNSIGNED,
832 			 precision,
833 			 saturate,
834 			 BRW_MATH_DATA_VECTOR);
835 
836     brw_pop_insn_state(p);
837 }
838 
839 
840 
841 
brw_dp_WRITE_16(struct brw_compile * p,struct brw_reg src,uint32_t msg_reg_nr,uint32_t scratch_offset)842 void brw_dp_WRITE_16( struct brw_compile *p,
843 		      struct brw_reg src,
844 		      uint32_t msg_reg_nr,
845 		      uint32_t scratch_offset )
846 {
847     {
848 	brw_push_insn_state(p);
849 	brw_set_mask_control(p, BRW_MASK_DISABLE);
850 	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
851 
852 	brw_MOV (p,
853 		retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
854 		brw_imm_d (scratch_offset));
855 
856 	brw_pop_insn_state(p);
857     }
858 
859     {
860 	uint32_t msg_length = 3;
861 	struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
862 	struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
863 
864 	insn->header.predicate_control = 0; /* XXX */
865 	insn->header.compression_control = BRW_COMPRESSION_NONE;
866 	insn->header.destreg__conditonalmod = msg_reg_nr;
867 
868 	brw_instruction_set_destination(insn, dest);
869 	brw_instruction_set_source0(insn, src);
870 
871 	brw_instruction_set_dp_write_message(insn,
872 					     255, /* bti */
873 					     BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
874 					     BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
875 					     msg_length,
876 					     0, /* pixel scoreboard */
877 					     0, /* response_length */
878 					     0); /* eot */
879     }
880 
881 }
882 
883 
brw_dp_READ_16(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,uint32_t scratch_offset)884 void brw_dp_READ_16( struct brw_compile *p,
885 		     struct brw_reg dest,
886 		     uint32_t msg_reg_nr,
887 		     uint32_t scratch_offset )
888 {
889     {
890 	brw_push_insn_state(p);
891 	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
892 	brw_set_mask_control(p, BRW_MASK_DISABLE);
893 
894 	brw_MOV (p,
895 		retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
896 		brw_imm_d (scratch_offset));
897 
898 	brw_pop_insn_state(p);
899     }
900 
901     {
902 	struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
903 
904 	insn->header.predicate_control = 0; /* XXX */
905 	insn->header.compression_control = BRW_COMPRESSION_NONE;
906 	insn->header.destreg__conditonalmod = msg_reg_nr;
907 
908 	brw_instruction_set_destination(insn, dest);	/* UW? */
909 	brw_instruction_set_source0(insn, retype(brw_vec8_grf(0), BRW_REGISTER_TYPE_UW));
910 
911 	brw_set_dp_read_message(insn,
912 				255, /* bti */
913 				3,  /* msg_control */
914 				BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
915 				1, /* target cache */
916 				1, /* msg_length */
917 				2, /* response_length */
918 				0); /* eot */
919     }
920 }
921 
922 
brw_fb_WRITE(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,struct brw_reg src0,uint32_t binding_table_index,uint32_t msg_length,uint32_t response_length,int eot)923 void brw_fb_WRITE(struct brw_compile *p,
924 		  struct brw_reg dest,
925 		  uint32_t msg_reg_nr,
926 		  struct brw_reg src0,
927 		  uint32_t binding_table_index,
928 		  uint32_t msg_length,
929 		  uint32_t response_length,
930 		  int eot)
931 {
932     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
933 
934     insn->header.predicate_control = 0; /* XXX */
935     insn->header.compression_control = BRW_COMPRESSION_NONE;
936     insn->header.destreg__conditonalmod = msg_reg_nr;
937 
938     brw_instruction_set_destination(insn, dest);
939     brw_instruction_set_source0(insn, src0);
940     brw_instruction_set_dp_write_message(insn,
941 					 binding_table_index,
942 					 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
943 					 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
944 					 msg_length,
945 					 1,	/* pixel scoreboard */
946 					 response_length,
947 					 eot);
948 }
949 
950 
951 
brw_SAMPLE(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,struct brw_reg src0,uint32_t binding_table_index,uint32_t sampler,uint32_t writemask,uint32_t msg_type,uint32_t response_length,uint32_t msg_length,cairo_bool_t eot)952 void brw_SAMPLE (struct brw_compile *p,
953 		 struct brw_reg dest,
954 		 uint32_t msg_reg_nr,
955 		 struct brw_reg src0,
956 		 uint32_t binding_table_index,
957 		 uint32_t sampler,
958 		 uint32_t writemask,
959 		 uint32_t msg_type,
960 		 uint32_t response_length,
961 		 uint32_t msg_length,
962 		 cairo_bool_t eot)
963 {
964     int need_stall = 0;
965 
966     if(writemask == 0) {
967 	/*       printf("%s: zero writemask??\n", __FUNCTION__); */
968 	return;
969     }
970 
971     /* Hardware doesn't do destination dependency checking on send
972      * instructions properly.  Add a workaround which generates the
973      * dependency by other means.  In practice it seems like this bug
974      * only crops up for texture samples, and only where registers are
975      * written by the send and then written again later without being
976      * read in between.  Luckily for us, we already track that
977      * information and use it to modify the writemask for the
978      * instruction, so that is a guide for whether a workaround is
979      * needed.
980      */
981     if (writemask != WRITEMASK_XYZW) {
982 	uint32_t dst_offset = 0;
983 	uint32_t i, newmask = 0, len = 0;
984 
985 	for (i = 0; i < 4; i++) {
986 	    if (writemask & (1<<i))
987 		break;
988 	    dst_offset += 2;
989 	}
990 	for (; i < 4; i++) {
991 	    if (!(writemask & (1<<i)))
992 		break;
993 	    newmask |= 1<<i;
994 	    len++;
995 	}
996 
997 	if (newmask != writemask) {
998 	    need_stall = 1;
999 	    /*	 printf("need stall %x %x\n", newmask , writemask); */
1000 	}
1001 	else {
1002 	    struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1003 
1004 	    newmask = ~newmask & WRITEMASK_XYZW;
1005 
1006 	    brw_push_insn_state(p);
1007 
1008 	    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1009 	    brw_set_mask_control(p, BRW_MASK_DISABLE);
1010 
1011 	    brw_MOV(p, m1, brw_vec8_grf(0));
1012 	    brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1013 
1014 	    brw_pop_insn_state(p);
1015 
1016 	    src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1017 	    dest = offset(dest, dst_offset);
1018 	    response_length = len * 2;
1019 	}
1020     }
1021 
1022     {
1023 	struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1024 
1025 	insn->header.predicate_control = 0; /* XXX */
1026 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1027 	insn->header.destreg__conditonalmod = msg_reg_nr;
1028 
1029 	brw_instruction_set_destination(insn, dest);
1030 	brw_instruction_set_source0(insn, src0);
1031 	brw_set_sampler_message (insn, p->is_g4x,
1032 				 binding_table_index,
1033 				 sampler,
1034 				 msg_type,
1035 				 response_length,
1036 				 msg_length,
1037 				 eot);
1038     }
1039 
1040     if (need_stall)
1041     {
1042 	struct brw_reg reg = vec8(offset(dest, response_length-1));
1043 
1044 	/*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1045 	*/
1046 	brw_push_insn_state(p);
1047 	brw_set_compression_control(p, 0);
1048 	brw_MOV(p, reg, reg);
1049 	brw_pop_insn_state(p);
1050     }
1051 }
1052 
1053 /* All these variables are pretty confusing - we might be better off
1054  * using bitmasks and macros for this, in the old style.  Or perhaps
1055  * just having the caller instantiate the fields in dword3 itself.
1056  */
brw_urb_WRITE(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,struct brw_reg src0,int allocate,int used,uint32_t msg_length,uint32_t response_length,int eot,int writes_complete,uint32_t offset,uint32_t swizzle)1057 void brw_urb_WRITE(struct brw_compile *p,
1058 		   struct brw_reg dest,
1059 		   uint32_t msg_reg_nr,
1060 		   struct brw_reg src0,
1061 		   int allocate,
1062 		   int used,
1063 		   uint32_t msg_length,
1064 		   uint32_t response_length,
1065 		   int eot,
1066 		   int writes_complete,
1067 		   uint32_t offset,
1068 		   uint32_t swizzle)
1069 {
1070     struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1071 
1072     assert(msg_length < 16);
1073 
1074     brw_instruction_set_destination (insn, dest);
1075     brw_instruction_set_source0 (insn, src0);
1076     brw_set_src1 (insn, brw_imm_d (0));
1077 
1078     insn->header.destreg__conditonalmod = msg_reg_nr;
1079 
1080     brw_set_urb_message (insn,
1081 			 allocate,
1082 			 used,
1083 			 msg_length,
1084 			 response_length,
1085 			 eot,
1086 			 writes_complete,
1087 			 offset,
1088 			 swizzle);
1089 }
1090