1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include "cairoint.h"
33 #include "cairo-drm-intel-brw-eu.h"
34
35 #include <string.h>
36
37 /***********************************************************************
38 * Internal helper for constructing instructions
39 */
40
guess_execution_size(struct brw_instruction * insn,struct brw_reg reg)41 static void guess_execution_size( struct brw_instruction *insn,
42 struct brw_reg reg )
43 {
44 if (reg.width == BRW_WIDTH_8 &&
45 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
46 insn->header.execution_size = BRW_EXECUTE_16;
47 else
48 insn->header.execution_size = reg.width; /* note - definitions are compatible */
49 }
50
51
52 void
brw_instruction_set_destination(struct brw_instruction * insn,struct brw_reg dest)53 brw_instruction_set_destination (struct brw_instruction *insn,
54 struct brw_reg dest)
55 {
56 insn->bits1.da1.dest_reg_file = dest.file;
57 insn->bits1.da1.dest_reg_type = dest.type;
58 insn->bits1.da1.dest_address_mode = dest.address_mode;
59
60 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
61 insn->bits1.da1.dest_reg_nr = dest.nr;
62
63 if (insn->header.access_mode == BRW_ALIGN_1) {
64 insn->bits1.da1.dest_subreg_nr = dest.subnr;
65 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
66 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
67 insn->bits1.da1.dest_horiz_stride = dest.hstride;
68 } else {
69 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
70 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
71 }
72 } else {
73 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
74
75 /* These are different sizes in align1 vs align16:
76 */
77 if (insn->header.access_mode == BRW_ALIGN_1) {
78 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
79 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
80 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
81 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
82 } else {
83 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
84 }
85 }
86
87 /* NEW: Set the execution size based on dest.width and
88 * insn->compression_control:
89 */
90 guess_execution_size(insn, dest);
91 }
92
93 void
brw_instruction_set_source0(struct brw_instruction * insn,struct brw_reg reg)94 brw_instruction_set_source0 (struct brw_instruction *insn,
95 struct brw_reg reg)
96 {
97 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99 insn->bits1.da1.src0_reg_file = reg.file;
100 insn->bits1.da1.src0_reg_type = reg.type;
101 insn->bits2.da1.src0_abs = reg.abs;
102 insn->bits2.da1.src0_negate = reg.negate;
103 insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105 if (reg.file == BRW_IMMEDIATE_VALUE) {
106 insn->bits3.ud = reg.dw1.ud;
107
108 /* Required to set some fields in src1 as well:
109 */
110 insn->bits1.da1.src1_reg_file = 0; /* arf */
111 insn->bits1.da1.src1_reg_type = reg.type;
112 } else {
113 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
114 if (insn->header.access_mode == BRW_ALIGN_1) {
115 insn->bits2.da1.src0_subreg_nr = reg.subnr;
116 insn->bits2.da1.src0_reg_nr = reg.nr;
117 } else {
118 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
119 insn->bits2.da16.src0_reg_nr = reg.nr;
120 }
121 } else {
122 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
123
124 if (insn->header.access_mode == BRW_ALIGN_1) {
125 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
126 } else {
127 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
128 }
129 }
130
131 if (insn->header.access_mode == BRW_ALIGN_1) {
132 if (reg.width == BRW_WIDTH_1 &&
133 insn->header.execution_size == BRW_EXECUTE_1) {
134 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
135 insn->bits2.da1.src0_width = BRW_WIDTH_1;
136 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
137 } else {
138 insn->bits2.da1.src0_horiz_stride = reg.hstride;
139 insn->bits2.da1.src0_width = reg.width;
140 insn->bits2.da1.src0_vert_stride = reg.vstride;
141 }
142 } else {
143 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
144 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
145 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
146 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
147
148 /* This is an oddity of the fact we're using the same
149 * descriptions for registers in align_16 as align_1:
150 */
151 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
152 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
153 else
154 insn->bits2.da16.src0_vert_stride = reg.vstride;
155 }
156 }
157 }
158
159
brw_set_src1(struct brw_instruction * insn,struct brw_reg reg)160 void brw_set_src1( struct brw_instruction *insn,
161 struct brw_reg reg )
162 {
163 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
164
165 insn->bits1.da1.src1_reg_file = reg.file;
166 insn->bits1.da1.src1_reg_type = reg.type;
167 insn->bits3.da1.src1_abs = reg.abs;
168 insn->bits3.da1.src1_negate = reg.negate;
169
170 /* Only src1 can be immediate in two-argument instructions.
171 */
172 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
173
174 if (reg.file == BRW_IMMEDIATE_VALUE) {
175 insn->bits3.ud = reg.dw1.ud;
176 }
177 else {
178 /* This is a hardware restriction, which may or may not be lifted
179 * in the future:
180 */
181 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
182 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
183
184 if (insn->header.access_mode == BRW_ALIGN_1) {
185 insn->bits3.da1.src1_subreg_nr = reg.subnr;
186 insn->bits3.da1.src1_reg_nr = reg.nr;
187 }
188 else {
189 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
190 insn->bits3.da16.src1_reg_nr = reg.nr;
191 }
192
193 if (insn->header.access_mode == BRW_ALIGN_1) {
194 if (reg.width == BRW_WIDTH_1 &&
195 insn->header.execution_size == BRW_EXECUTE_1) {
196 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
197 insn->bits3.da1.src1_width = BRW_WIDTH_1;
198 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
199 }
200 else {
201 insn->bits3.da1.src1_horiz_stride = reg.hstride;
202 insn->bits3.da1.src1_width = reg.width;
203 insn->bits3.da1.src1_vert_stride = reg.vstride;
204 }
205 }
206 else {
207 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
208 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
209 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
210 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
211
212 /* This is an oddity of the fact we're using the same
213 * descriptions for registers in align_16 as align_1:
214 */
215 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
216 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
217 else
218 insn->bits3.da16.src1_vert_stride = reg.vstride;
219 }
220 }
221 }
222
223
224
brw_set_math_message(struct brw_instruction * insn,uint32_t msg_length,uint32_t response_length,uint32_t function,uint32_t integer_type,int low_precision,int saturate,uint32_t dataType)225 static void brw_set_math_message( struct brw_instruction *insn,
226 uint32_t msg_length,
227 uint32_t response_length,
228 uint32_t function,
229 uint32_t integer_type,
230 int low_precision,
231 int saturate,
232 uint32_t dataType )
233 {
234 brw_set_src1 (insn, brw_imm_d (0));
235
236 insn->bits3.math.function = function;
237 insn->bits3.math.int_type = integer_type;
238 insn->bits3.math.precision = low_precision;
239 insn->bits3.math.saturate = saturate;
240 insn->bits3.math.data_type = dataType;
241 insn->bits3.math.response_length = response_length;
242 insn->bits3.math.msg_length = msg_length;
243 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
244 insn->bits3.math.end_of_thread = 0;
245 }
246
brw_set_urb_message(struct brw_instruction * insn,int allocate,int used,uint32_t msg_length,uint32_t response_length,int end_of_thread,int complete,uint32_t offset,uint32_t swizzle_control)247 static void brw_set_urb_message( struct brw_instruction *insn,
248 int allocate,
249 int used,
250 uint32_t msg_length,
251 uint32_t response_length,
252 int end_of_thread,
253 int complete,
254 uint32_t offset,
255 uint32_t swizzle_control )
256 {
257 brw_set_src1 (insn, brw_imm_d (0));
258
259 insn->bits3.urb.opcode = 0; /* ? */
260 insn->bits3.urb.offset = offset;
261 insn->bits3.urb.swizzle_control = swizzle_control;
262 insn->bits3.urb.allocate = allocate;
263 insn->bits3.urb.used = used; /* ? */
264 insn->bits3.urb.complete = complete;
265 insn->bits3.urb.response_length = response_length;
266 insn->bits3.urb.msg_length = msg_length;
267 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
268 insn->bits3.urb.end_of_thread = end_of_thread;
269 }
270
271 void
brw_instruction_set_dp_write_message(struct brw_instruction * insn,uint32_t binding_table_index,uint32_t msg_control,uint32_t msg_type,uint32_t msg_length,uint32_t pixel_scoreboard_clear,uint32_t response_length,uint32_t end_of_thread)272 brw_instruction_set_dp_write_message (struct brw_instruction *insn,
273 uint32_t binding_table_index,
274 uint32_t msg_control,
275 uint32_t msg_type,
276 uint32_t msg_length,
277 uint32_t pixel_scoreboard_clear,
278 uint32_t response_length,
279 uint32_t end_of_thread)
280 {
281 brw_set_src1 (insn, brw_imm_d (0));
282
283 insn->bits3.dp_write.binding_table_index = binding_table_index;
284 insn->bits3.dp_write.msg_control = msg_control;
285 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
286 insn->bits3.dp_write.msg_type = msg_type;
287 insn->bits3.dp_write.send_commit_msg = 0;
288 insn->bits3.dp_write.response_length = response_length;
289 insn->bits3.dp_write.msg_length = msg_length;
290 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
291 insn->bits3.urb.end_of_thread = end_of_thread;
292 }
293
brw_set_dp_read_message(struct brw_instruction * insn,uint32_t binding_table_index,uint32_t msg_control,uint32_t msg_type,uint32_t target_cache,uint32_t msg_length,uint32_t response_length,uint32_t end_of_thread)294 static void brw_set_dp_read_message( struct brw_instruction *insn,
295 uint32_t binding_table_index,
296 uint32_t msg_control,
297 uint32_t msg_type,
298 uint32_t target_cache,
299 uint32_t msg_length,
300 uint32_t response_length,
301 uint32_t end_of_thread )
302 {
303 brw_set_src1 (insn, brw_imm_d (0));
304
305 insn->bits3.dp_read.binding_table_index = binding_table_index;
306 insn->bits3.dp_read.msg_control = msg_control;
307 insn->bits3.dp_read.msg_type = msg_type;
308 insn->bits3.dp_read.target_cache = target_cache;
309 insn->bits3.dp_read.response_length = response_length;
310 insn->bits3.dp_read.msg_length = msg_length;
311 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
312 insn->bits3.dp_read.end_of_thread = end_of_thread;
313 }
314
315 static void
brw_set_sampler_message(struct brw_instruction * insn,cairo_bool_t is_g4x,uint32_t binding_table_index,uint32_t sampler,uint32_t msg_type,uint32_t response_length,uint32_t msg_length,cairo_bool_t eot)316 brw_set_sampler_message (struct brw_instruction *insn,
317 cairo_bool_t is_g4x,
318 uint32_t binding_table_index,
319 uint32_t sampler,
320 uint32_t msg_type,
321 uint32_t response_length,
322 uint32_t msg_length,
323 cairo_bool_t eot)
324 {
325 brw_set_src1 (insn, brw_imm_d (0));
326
327 if (is_g4x) {
328 /* XXX presume the driver is sane! */
329 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
330 insn->bits3.sampler_g4x.sampler = sampler;
331 insn->bits3.sampler_g4x.msg_type = msg_type;
332 insn->bits3.sampler_g4x.response_length = response_length;
333 insn->bits3.sampler_g4x.msg_length = msg_length;
334 insn->bits3.sampler_g4x.end_of_thread = eot;
335 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
336 } else {
337 insn->bits3.sampler.binding_table_index = binding_table_index;
338 insn->bits3.sampler.sampler = sampler;
339 insn->bits3.sampler.msg_type = msg_type;
340 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
341 insn->bits3.sampler.response_length = response_length;
342 insn->bits3.sampler.msg_length = msg_length;
343 insn->bits3.sampler.end_of_thread = eot;
344 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
345 }
346 }
347
348 struct brw_instruction *
brw_next_instruction(struct brw_compile * p,uint32_t opcode)349 brw_next_instruction (struct brw_compile *p,
350 uint32_t opcode)
351 {
352 struct brw_instruction *insn;
353
354 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
355
356 insn = &p->store[p->nr_insn++];
357 memcpy(insn, p->current, sizeof(*insn));
358
359 /* Reset this one-shot flag: */
360 if (p->current->header.destreg__conditonalmod) {
361 p->current->header.destreg__conditonalmod = 0;
362 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
363 }
364
365 insn->header.opcode = opcode;
366 return insn;
367 }
368
brw_alu1(struct brw_compile * p,uint32_t opcode,struct brw_reg dest,struct brw_reg src)369 static struct brw_instruction *brw_alu1( struct brw_compile *p,
370 uint32_t opcode,
371 struct brw_reg dest,
372 struct brw_reg src )
373 {
374 struct brw_instruction *insn = brw_next_instruction(p, opcode);
375 brw_instruction_set_destination(insn, dest);
376 brw_instruction_set_source0(insn, src);
377 return insn;
378 }
379
brw_alu2(struct brw_compile * p,uint32_t opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)380 static struct brw_instruction *brw_alu2(struct brw_compile *p,
381 uint32_t opcode,
382 struct brw_reg dest,
383 struct brw_reg src0,
384 struct brw_reg src1 )
385 {
386 struct brw_instruction *insn = brw_next_instruction(p, opcode);
387 brw_instruction_set_destination(insn, dest);
388 brw_instruction_set_source0(insn, src0);
389 brw_set_src1(insn, src1);
390 return insn;
391 }
392
393
394 /***********************************************************************
395 * Convenience routines.
396 */
397 #define ALU1(OP) \
398 struct brw_instruction *brw_##OP(struct brw_compile *p, \
399 struct brw_reg dest, \
400 struct brw_reg src0) \
401 { \
402 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
403 }
404
405 #define ALU2(OP) \
406 struct brw_instruction *brw_##OP(struct brw_compile *p, \
407 struct brw_reg dest, \
408 struct brw_reg src0, \
409 struct brw_reg src1) \
410 { \
411 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
412 }
413
414
415 ALU1(MOV)
ALU2(SEL)416 ALU2(SEL)
417 ALU1(NOT)
418 ALU2(AND)
419 ALU2(OR)
420 ALU2(XOR)
421 ALU2(SHR)
422 ALU2(SHL)
423 ALU2(RSR)
424 ALU2(RSL)
425 ALU2(ASR)
426 ALU2(ADD)
427 ALU2(MUL)
428 ALU1(FRC)
429 ALU1(RNDD)
430 ALU1(RNDZ)
431 ALU2(MAC)
432 ALU2(MACH)
433 ALU1(LZD)
434 ALU2(DP4)
435 ALU2(DPH)
436 ALU2(DP3)
437 ALU2(DP2)
438 ALU2(LINE)
439
440
441
442
443 void brw_NOP(struct brw_compile *p)
444 {
445 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_NOP);
446 brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
447 brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
448 brw_set_src1(insn, brw_imm_ud(0x0));
449 }
450
451
452
453
454
455 /***********************************************************************
456 * Comparisons, if/else/endif
457 */
458
brw_JMPI(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)459 struct brw_instruction *brw_JMPI(struct brw_compile *p,
460 struct brw_reg dest,
461 struct brw_reg src0,
462 struct brw_reg src1)
463 {
464 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
465
466 p->current->header.predicate_control = BRW_PREDICATE_NONE;
467
468 return insn;
469 }
470
471 /* EU takes the value from the flag register and pushes it onto some
472 * sort of a stack (presumably merging with any flag value already on
473 * the stack). Within an if block, the flags at the top of the stack
474 * control execution on each channel of the unit, eg. on each of the
475 * 16 pixel values in our wm programs.
476 *
477 * When the matching 'else' instruction is reached (presumably by
478 * countdown of the instruction count patched in by our ELSE/ENDIF
479 * functions), the relevant flags are inverted.
480 *
481 * When the matching 'endif' instruction is reached, the flags are
482 * popped off. If the stack is now empty, normal execution resumes.
483 *
484 * No attempt is made to deal with stack overflow (14 elements?).
485 */
brw_IF(struct brw_compile * p,uint32_t execute_size)486 struct brw_instruction *brw_IF(struct brw_compile *p, uint32_t execute_size)
487 {
488 struct brw_instruction *insn;
489
490 if (p->single_program_flow) {
491 assert(execute_size == BRW_EXECUTE_1);
492
493 insn = brw_next_instruction(p, BRW_OPCODE_ADD);
494 insn->header.predicate_inverse = 1;
495 } else {
496 insn = brw_next_instruction(p, BRW_OPCODE_IF);
497 }
498
499 /* Override the defaults for this instruction:
500 */
501 brw_instruction_set_destination (insn, brw_ip_reg ());
502 brw_instruction_set_source0 (insn, brw_ip_reg ());
503 brw_set_src1 (insn, brw_imm_d (0));
504
505 insn->header.execution_size = execute_size;
506 insn->header.compression_control = BRW_COMPRESSION_NONE;
507 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
508 insn->header.mask_control = BRW_MASK_ENABLE;
509 if (!p->single_program_flow)
510 insn->header.thread_control = BRW_THREAD_SWITCH;
511
512 p->current->header.predicate_control = BRW_PREDICATE_NONE;
513
514 return insn;
515 }
516
517
brw_ELSE(struct brw_compile * p,struct brw_instruction * if_insn)518 struct brw_instruction *brw_ELSE(struct brw_compile *p,
519 struct brw_instruction *if_insn)
520 {
521 struct brw_instruction *insn;
522
523 if (p->single_program_flow) {
524 insn = brw_next_instruction(p, BRW_OPCODE_ADD);
525 } else {
526 insn = brw_next_instruction(p, BRW_OPCODE_ELSE);
527 }
528
529 brw_instruction_set_destination (insn, brw_ip_reg ());
530 brw_instruction_set_source0 (insn, brw_ip_reg ());
531 brw_set_src1 (insn, brw_imm_d (0));
532
533 insn->header.compression_control = BRW_COMPRESSION_NONE;
534 insn->header.execution_size = if_insn->header.execution_size;
535 insn->header.mask_control = BRW_MASK_ENABLE;
536 if (!p->single_program_flow)
537 insn->header.thread_control = BRW_THREAD_SWITCH;
538
539 /* Patch the if instruction to point at this instruction.
540 */
541 if (p->single_program_flow) {
542 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
543
544 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
545 } else {
546 assert(if_insn->header.opcode == BRW_OPCODE_IF);
547
548 if_insn->bits3.if_else.jump_count = insn - if_insn;
549 if_insn->bits3.if_else.pop_count = 1;
550 if_insn->bits3.if_else.pad0 = 0;
551 }
552
553 return insn;
554 }
555
brw_ENDIF(struct brw_compile * p,struct brw_instruction * patch_insn)556 void brw_ENDIF(struct brw_compile *p,
557 struct brw_instruction *patch_insn)
558 {
559 if (p->single_program_flow) {
560 /* In single program flow mode, there's no need to execute an ENDIF,
561 * since we don't need to do any stack operations, and if we're executing
562 * currently, we want to just continue executing.
563 */
564 struct brw_instruction *next = &p->store[p->nr_insn];
565
566 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
567
568 patch_insn->bits3.ud = (next - patch_insn) * 16;
569 } else {
570 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_ENDIF);
571
572 brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
573 brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
574 brw_set_src1 (insn, brw_imm_d (0));
575
576 insn->header.compression_control = BRW_COMPRESSION_NONE;
577 insn->header.execution_size = patch_insn->header.execution_size;
578 insn->header.mask_control = BRW_MASK_ENABLE;
579 insn->header.thread_control = BRW_THREAD_SWITCH;
580
581 assert(patch_insn->bits3.if_else.jump_count == 0);
582
583 /* Patch the if or else instructions to point at this or the next
584 * instruction respectively.
585 */
586 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
587 /* Automagically turn it into an IFF:
588 */
589 patch_insn->header.opcode = BRW_OPCODE_IFF;
590 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
591 patch_insn->bits3.if_else.pop_count = 0;
592 patch_insn->bits3.if_else.pad0 = 0;
593 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
594 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
595 patch_insn->bits3.if_else.pop_count = 1;
596 patch_insn->bits3.if_else.pad0 = 0;
597 } else {
598 assert(0);
599 }
600
601 /* Also pop item off the stack in the endif instruction:
602 */
603 insn->bits3.if_else.jump_count = 0;
604 insn->bits3.if_else.pop_count = 1;
605 insn->bits3.if_else.pad0 = 0;
606 }
607 }
608
brw_BREAK(struct brw_compile * p)609 struct brw_instruction *brw_BREAK(struct brw_compile *p)
610 {
611 struct brw_instruction *insn;
612 insn = brw_next_instruction(p, BRW_OPCODE_BREAK);
613 brw_instruction_set_destination(insn, brw_ip_reg());
614 brw_instruction_set_source0(insn, brw_ip_reg());
615 brw_set_src1(insn, brw_imm_d (0));
616 insn->header.compression_control = BRW_COMPRESSION_NONE;
617 insn->header.execution_size = BRW_EXECUTE_8;
618 /* insn->header.mask_control = BRW_MASK_DISABLE; */
619 insn->bits3.if_else.pad0 = 0;
620 return insn;
621 }
622
brw_CONT(struct brw_compile * p)623 struct brw_instruction *brw_CONT(struct brw_compile *p)
624 {
625 struct brw_instruction *insn;
626 insn = brw_next_instruction(p, BRW_OPCODE_CONTINUE);
627 brw_instruction_set_destination(insn, brw_ip_reg());
628 brw_instruction_set_source0(insn, brw_ip_reg());
629 brw_set_src1 (insn, brw_imm_d (0));
630 insn->header.compression_control = BRW_COMPRESSION_NONE;
631 insn->header.execution_size = BRW_EXECUTE_8;
632 /* insn->header.mask_control = BRW_MASK_DISABLE; */
633 insn->bits3.if_else.pad0 = 0;
634 return insn;
635 }
636
637 /* DO/WHILE loop:
638 */
brw_DO(struct brw_compile * p,uint32_t execute_size)639 struct brw_instruction *brw_DO(struct brw_compile *p, uint32_t execute_size)
640 {
641 if (p->single_program_flow) {
642 return &p->store[p->nr_insn];
643 } else {
644 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_DO);
645
646 /* Override the defaults for this instruction:
647 */
648 brw_instruction_set_destination(insn, brw_null_reg());
649 brw_instruction_set_source0(insn, brw_null_reg());
650 brw_set_src1(insn, brw_null_reg());
651
652 insn->header.compression_control = BRW_COMPRESSION_NONE;
653 insn->header.execution_size = execute_size;
654 insn->header.predicate_control = BRW_PREDICATE_NONE;
655 /* insn->header.mask_control = BRW_MASK_ENABLE; */
656 /* insn->header.mask_control = BRW_MASK_DISABLE; */
657
658 return insn;
659 }
660 }
661
662
663
brw_WHILE(struct brw_compile * p,struct brw_instruction * do_insn)664 struct brw_instruction *brw_WHILE(struct brw_compile *p,
665 struct brw_instruction *do_insn)
666 {
667 struct brw_instruction *insn;
668
669 if (p->single_program_flow)
670 insn = brw_next_instruction(p, BRW_OPCODE_ADD);
671 else
672 insn = brw_next_instruction(p, BRW_OPCODE_WHILE);
673
674 brw_instruction_set_destination(insn, brw_ip_reg());
675 brw_instruction_set_source0(insn, brw_ip_reg());
676 brw_set_src1 (insn, brw_imm_d (0));
677
678 insn->header.compression_control = BRW_COMPRESSION_NONE;
679
680 if (p->single_program_flow) {
681 insn->header.execution_size = BRW_EXECUTE_1;
682
683 insn->bits3.d = (do_insn - insn) * 16;
684 } else {
685 insn->header.execution_size = do_insn->header.execution_size;
686
687 assert(do_insn->header.opcode == BRW_OPCODE_DO);
688 insn->bits3.if_else.jump_count = do_insn - insn + 1;
689 insn->bits3.if_else.pop_count = 0;
690 insn->bits3.if_else.pad0 = 0;
691 }
692
693 /* insn->header.mask_control = BRW_MASK_ENABLE; */
694
695 /* insn->header.mask_control = BRW_MASK_DISABLE; */
696 p->current->header.predicate_control = BRW_PREDICATE_NONE;
697 return insn;
698 }
699
700
701 /* FORWARD JUMPS:
702 */
brw_land_fwd_jump(struct brw_compile * p,struct brw_instruction * jmp_insn)703 void brw_land_fwd_jump(struct brw_compile *p,
704 struct brw_instruction *jmp_insn)
705 {
706 struct brw_instruction *landing = &p->store[p->nr_insn];
707
708 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
709 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
710
711 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
712 }
713
714
715
716 /* To integrate with the above, it makes sense that the comparison
717 * instruction should populate the flag register. It might be simpler
718 * just to use the flag reg for most WM tasks?
719 */
brw_CMP(struct brw_compile * p,struct brw_reg dest,uint32_t conditional,struct brw_reg src0,struct brw_reg src1)720 void brw_CMP(struct brw_compile *p,
721 struct brw_reg dest,
722 uint32_t conditional,
723 struct brw_reg src0,
724 struct brw_reg src1)
725 {
726 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_CMP);
727
728 insn->header.destreg__conditonalmod = conditional;
729 brw_instruction_set_destination(insn, dest);
730 brw_instruction_set_source0(insn, src0);
731 brw_set_src1(insn, src1);
732
733 /* guess_execution_size(insn, src0); */
734
735
736 /* Make it so that future instructions will use the computed flag
737 * value until brw_set_predicate_control_flag_value() is called
738 * again.
739 */
740 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
741 dest.nr == 0) {
742 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
743 p->flag_value = 0xff;
744 }
745 }
746
747
748
749 /***********************************************************************
750 * Helpers for the various SEND message types:
751 */
752
753 /* Invert 8 values
754 */
brw_math(struct brw_compile * p,struct brw_reg dest,uint32_t function,uint32_t saturate,uint32_t msg_reg_nr,struct brw_reg src,uint32_t data_type,uint32_t precision)755 void brw_math( struct brw_compile *p,
756 struct brw_reg dest,
757 uint32_t function,
758 uint32_t saturate,
759 uint32_t msg_reg_nr,
760 struct brw_reg src,
761 uint32_t data_type,
762 uint32_t precision )
763 {
764 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
765 uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
766 uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
767
768 /* Example code doesn't set predicate_control for send
769 * instructions.
770 */
771 insn->header.predicate_control = 0;
772 insn->header.destreg__conditonalmod = msg_reg_nr;
773
774 response_length = 1;
775
776 brw_instruction_set_destination(insn, dest);
777 brw_instruction_set_source0(insn, src);
778 brw_set_math_message(insn,
779 msg_length, response_length,
780 function,
781 BRW_MATH_INTEGER_UNSIGNED,
782 precision,
783 saturate,
784 data_type);
785 }
786
787 /* Use 2 send instructions to invert 16 elements
788 */
brw_math_16(struct brw_compile * p,struct brw_reg dest,uint32_t function,uint32_t saturate,uint32_t msg_reg_nr,struct brw_reg src,uint32_t precision)789 void brw_math_16( struct brw_compile *p,
790 struct brw_reg dest,
791 uint32_t function,
792 uint32_t saturate,
793 uint32_t msg_reg_nr,
794 struct brw_reg src,
795 uint32_t precision )
796 {
797 struct brw_instruction *insn;
798 uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
799 uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
800
801 /* First instruction:
802 */
803 brw_push_insn_state(p);
804 brw_set_predicate_control_flag_value(p, 0xff);
805 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
806
807 insn = brw_next_instruction(p, BRW_OPCODE_SEND);
808 insn->header.destreg__conditonalmod = msg_reg_nr;
809
810 brw_instruction_set_destination(insn, dest);
811 brw_instruction_set_source0(insn, src);
812 brw_set_math_message(insn,
813 msg_length, response_length,
814 function,
815 BRW_MATH_INTEGER_UNSIGNED,
816 precision,
817 saturate,
818 BRW_MATH_DATA_VECTOR);
819
820 /* Second instruction:
821 */
822 insn = brw_next_instruction(p, BRW_OPCODE_SEND);
823 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
824 insn->header.destreg__conditonalmod = msg_reg_nr+1;
825
826 brw_instruction_set_destination(insn, offset(dest,1));
827 brw_instruction_set_source0(insn, src);
828 brw_set_math_message(insn,
829 msg_length, response_length,
830 function,
831 BRW_MATH_INTEGER_UNSIGNED,
832 precision,
833 saturate,
834 BRW_MATH_DATA_VECTOR);
835
836 brw_pop_insn_state(p);
837 }
838
839
840
841
brw_dp_WRITE_16(struct brw_compile * p,struct brw_reg src,uint32_t msg_reg_nr,uint32_t scratch_offset)842 void brw_dp_WRITE_16( struct brw_compile *p,
843 struct brw_reg src,
844 uint32_t msg_reg_nr,
845 uint32_t scratch_offset )
846 {
847 {
848 brw_push_insn_state(p);
849 brw_set_mask_control(p, BRW_MASK_DISABLE);
850 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
851
852 brw_MOV (p,
853 retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
854 brw_imm_d (scratch_offset));
855
856 brw_pop_insn_state(p);
857 }
858
859 {
860 uint32_t msg_length = 3;
861 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
862 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
863
864 insn->header.predicate_control = 0; /* XXX */
865 insn->header.compression_control = BRW_COMPRESSION_NONE;
866 insn->header.destreg__conditonalmod = msg_reg_nr;
867
868 brw_instruction_set_destination(insn, dest);
869 brw_instruction_set_source0(insn, src);
870
871 brw_instruction_set_dp_write_message(insn,
872 255, /* bti */
873 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
874 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
875 msg_length,
876 0, /* pixel scoreboard */
877 0, /* response_length */
878 0); /* eot */
879 }
880
881 }
882
883
brw_dp_READ_16(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,uint32_t scratch_offset)884 void brw_dp_READ_16( struct brw_compile *p,
885 struct brw_reg dest,
886 uint32_t msg_reg_nr,
887 uint32_t scratch_offset )
888 {
889 {
890 brw_push_insn_state(p);
891 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
892 brw_set_mask_control(p, BRW_MASK_DISABLE);
893
894 brw_MOV (p,
895 retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D),
896 brw_imm_d (scratch_offset));
897
898 brw_pop_insn_state(p);
899 }
900
901 {
902 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
903
904 insn->header.predicate_control = 0; /* XXX */
905 insn->header.compression_control = BRW_COMPRESSION_NONE;
906 insn->header.destreg__conditonalmod = msg_reg_nr;
907
908 brw_instruction_set_destination(insn, dest); /* UW? */
909 brw_instruction_set_source0(insn, retype(brw_vec8_grf(0), BRW_REGISTER_TYPE_UW));
910
911 brw_set_dp_read_message(insn,
912 255, /* bti */
913 3, /* msg_control */
914 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
915 1, /* target cache */
916 1, /* msg_length */
917 2, /* response_length */
918 0); /* eot */
919 }
920 }
921
922
brw_fb_WRITE(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,struct brw_reg src0,uint32_t binding_table_index,uint32_t msg_length,uint32_t response_length,int eot)923 void brw_fb_WRITE(struct brw_compile *p,
924 struct brw_reg dest,
925 uint32_t msg_reg_nr,
926 struct brw_reg src0,
927 uint32_t binding_table_index,
928 uint32_t msg_length,
929 uint32_t response_length,
930 int eot)
931 {
932 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
933
934 insn->header.predicate_control = 0; /* XXX */
935 insn->header.compression_control = BRW_COMPRESSION_NONE;
936 insn->header.destreg__conditonalmod = msg_reg_nr;
937
938 brw_instruction_set_destination(insn, dest);
939 brw_instruction_set_source0(insn, src0);
940 brw_instruction_set_dp_write_message(insn,
941 binding_table_index,
942 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
943 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
944 msg_length,
945 1, /* pixel scoreboard */
946 response_length,
947 eot);
948 }
949
950
951
brw_SAMPLE(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,struct brw_reg src0,uint32_t binding_table_index,uint32_t sampler,uint32_t writemask,uint32_t msg_type,uint32_t response_length,uint32_t msg_length,cairo_bool_t eot)952 void brw_SAMPLE (struct brw_compile *p,
953 struct brw_reg dest,
954 uint32_t msg_reg_nr,
955 struct brw_reg src0,
956 uint32_t binding_table_index,
957 uint32_t sampler,
958 uint32_t writemask,
959 uint32_t msg_type,
960 uint32_t response_length,
961 uint32_t msg_length,
962 cairo_bool_t eot)
963 {
964 int need_stall = 0;
965
966 if(writemask == 0) {
967 /* printf("%s: zero writemask??\n", __FUNCTION__); */
968 return;
969 }
970
971 /* Hardware doesn't do destination dependency checking on send
972 * instructions properly. Add a workaround which generates the
973 * dependency by other means. In practice it seems like this bug
974 * only crops up for texture samples, and only where registers are
975 * written by the send and then written again later without being
976 * read in between. Luckily for us, we already track that
977 * information and use it to modify the writemask for the
978 * instruction, so that is a guide for whether a workaround is
979 * needed.
980 */
981 if (writemask != WRITEMASK_XYZW) {
982 uint32_t dst_offset = 0;
983 uint32_t i, newmask = 0, len = 0;
984
985 for (i = 0; i < 4; i++) {
986 if (writemask & (1<<i))
987 break;
988 dst_offset += 2;
989 }
990 for (; i < 4; i++) {
991 if (!(writemask & (1<<i)))
992 break;
993 newmask |= 1<<i;
994 len++;
995 }
996
997 if (newmask != writemask) {
998 need_stall = 1;
999 /* printf("need stall %x %x\n", newmask , writemask); */
1000 }
1001 else {
1002 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1003
1004 newmask = ~newmask & WRITEMASK_XYZW;
1005
1006 brw_push_insn_state(p);
1007
1008 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1009 brw_set_mask_control(p, BRW_MASK_DISABLE);
1010
1011 brw_MOV(p, m1, brw_vec8_grf(0));
1012 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1013
1014 brw_pop_insn_state(p);
1015
1016 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1017 dest = offset(dest, dst_offset);
1018 response_length = len * 2;
1019 }
1020 }
1021
1022 {
1023 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1024
1025 insn->header.predicate_control = 0; /* XXX */
1026 insn->header.compression_control = BRW_COMPRESSION_NONE;
1027 insn->header.destreg__conditonalmod = msg_reg_nr;
1028
1029 brw_instruction_set_destination(insn, dest);
1030 brw_instruction_set_source0(insn, src0);
1031 brw_set_sampler_message (insn, p->is_g4x,
1032 binding_table_index,
1033 sampler,
1034 msg_type,
1035 response_length,
1036 msg_length,
1037 eot);
1038 }
1039
1040 if (need_stall)
1041 {
1042 struct brw_reg reg = vec8(offset(dest, response_length-1));
1043
1044 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1045 */
1046 brw_push_insn_state(p);
1047 brw_set_compression_control(p, 0);
1048 brw_MOV(p, reg, reg);
1049 brw_pop_insn_state(p);
1050 }
1051 }
1052
1053 /* All these variables are pretty confusing - we might be better off
1054 * using bitmasks and macros for this, in the old style. Or perhaps
1055 * just having the caller instantiate the fields in dword3 itself.
1056 */
brw_urb_WRITE(struct brw_compile * p,struct brw_reg dest,uint32_t msg_reg_nr,struct brw_reg src0,int allocate,int used,uint32_t msg_length,uint32_t response_length,int eot,int writes_complete,uint32_t offset,uint32_t swizzle)1057 void brw_urb_WRITE(struct brw_compile *p,
1058 struct brw_reg dest,
1059 uint32_t msg_reg_nr,
1060 struct brw_reg src0,
1061 int allocate,
1062 int used,
1063 uint32_t msg_length,
1064 uint32_t response_length,
1065 int eot,
1066 int writes_complete,
1067 uint32_t offset,
1068 uint32_t swizzle)
1069 {
1070 struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND);
1071
1072 assert(msg_length < 16);
1073
1074 brw_instruction_set_destination (insn, dest);
1075 brw_instruction_set_source0 (insn, src0);
1076 brw_set_src1 (insn, brw_imm_d (0));
1077
1078 insn->header.destreg__conditonalmod = msg_reg_nr;
1079
1080 brw_set_urb_message (insn,
1081 allocate,
1082 used,
1083 msg_length,
1084 response_length,
1085 eot,
1086 writes_complete,
1087 offset,
1088 swizzle);
1089 }
1090