1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include "brw_eu.h"
33
34 #include <string.h>
35 #include <stdlib.h>
36
37 #define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
guess_execution_size(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width;
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
gen6_resolve_implied_move(struct brw_compile * p,struct brw_reg * src,unsigned msg_reg_nr)62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 unsigned msg_reg_nr)
65 {
66 if (p->gen < 060)
67 return;
68
69 if (src->file == BRW_MESSAGE_REGISTER_FILE)
70 return;
71
72 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
73 brw_push_insn_state(p);
74 brw_set_mask_control(p, BRW_MASK_DISABLE);
75 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
76 brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
77 brw_pop_insn_state(p);
78 }
79 *src = brw_message_reg(msg_reg_nr);
80 }
81
82 static void
gen7_convert_mrf_to_grf(struct brw_compile * p,struct brw_reg * reg)83 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
84 {
85 /* From the BSpec / ISA Reference / send - [DevIVB+]:
86 * "The send with EOT should use register space R112-R127 for <src>. This is
87 * to enable loading of a new thread into the same slot while the message
88 * with EOT for current thread is pending dispatch."
89 *
90 * Since we're pretending to have 16 MRFs anyway, we may as well use the
91 * registers required for messages with EOT.
92 */
93 if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
94 reg->file = BRW_GENERAL_REGISTER_FILE;
95 reg->nr += 111;
96 }
97 }
98
99 void
brw_set_dest(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg dest)100 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
101 struct brw_reg dest)
102 {
103 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
104 dest.file != BRW_MESSAGE_REGISTER_FILE)
105 assert(dest.nr < 128);
106
107 gen7_convert_mrf_to_grf(p, &dest);
108
109 insn->bits1.da1.dest_reg_file = dest.file;
110 insn->bits1.da1.dest_reg_type = dest.type;
111 insn->bits1.da1.dest_address_mode = dest.address_mode;
112
113 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
114 insn->bits1.da1.dest_reg_nr = dest.nr;
115
116 if (insn->header.access_mode == BRW_ALIGN_1) {
117 insn->bits1.da1.dest_subreg_nr = dest.subnr;
118 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
119 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
120 insn->bits1.da1.dest_horiz_stride = dest.hstride;
121 } else {
122 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
123 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
124 /* even ignored in da16, still need to set as '01' */
125 insn->bits1.da16.dest_horiz_stride = 1;
126 }
127 } else {
128 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
129
130 /* These are different sizes in align1 vs align16:
131 */
132 if (insn->header.access_mode == BRW_ALIGN_1) {
133 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
134 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
135 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
136 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
137 }
138 else {
139 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
140 /* even ignored in da16, still need to set as '01' */
141 insn->bits1.ia16.dest_horiz_stride = 1;
142 }
143 }
144
145 guess_execution_size(p, insn, dest);
146 }
147
148 static const int reg_type_size[8] = {
149 [0] = 4,
150 [1] = 4,
151 [2] = 2,
152 [3] = 2,
153 [4] = 1,
154 [5] = 1,
155 [7] = 4
156 };
157
158 static void
validate_reg(struct brw_instruction * insn,struct brw_reg reg)159 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
160 {
161 int hstride_for_reg[] = {0, 1, 2, 4};
162 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
163 int width_for_reg[] = {1, 2, 4, 8, 16};
164 int execsize_for_reg[] = {1, 2, 4, 8, 16};
165 int width, hstride, vstride, execsize;
166
167 if (reg.file == BRW_IMMEDIATE_VALUE) {
168 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
169 * mean the destination has to be 128-bit aligned and the
170 * destination horiz stride has to be a word.
171 */
172 if (reg.type == BRW_REGISTER_TYPE_V) {
173 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
174 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
175 }
176
177 return;
178 }
179
180 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
181 reg.nr == BRW_ARF_NULL)
182 return;
183
184 assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
185 assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
186 assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
187 assert(insn->header.execution_size >= 0 && insn->header.execution_size < ARRAY_SIZE(execsize_for_reg));
188
189 hstride = hstride_for_reg[reg.hstride];
190
191 if (reg.vstride == 0xf) {
192 vstride = -1;
193 } else {
194 vstride = vstride_for_reg[reg.vstride];
195 }
196
197 width = width_for_reg[reg.width];
198
199 execsize = execsize_for_reg[insn->header.execution_size];
200
201 /* Restrictions from 3.3.10: Register Region Restrictions. */
202 /* 3. */
203 assert(execsize >= width);
204
205 /* 4. */
206 if (execsize == width && hstride != 0) {
207 assert(vstride == -1 || vstride == width * hstride);
208 }
209
210 /* 5. */
211 if (execsize == width && hstride == 0) {
212 /* no restriction on vstride. */
213 }
214
215 /* 6. */
216 if (width == 1) {
217 assert(hstride == 0);
218 }
219
220 /* 7. */
221 if (execsize == 1 && width == 1) {
222 assert(hstride == 0);
223 assert(vstride == 0);
224 }
225
226 /* 8. */
227 if (vstride == 0 && hstride == 0) {
228 assert(width == 1);
229 }
230
231 /* 10. Check destination issues. */
232 }
233
234 void
brw_set_src0(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)235 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
236 struct brw_reg reg)
237 {
238 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
239 assert(reg.nr < 128);
240
241 gen7_convert_mrf_to_grf(p, ®);
242
243 validate_reg(insn, reg);
244
245 insn->bits1.da1.src0_reg_file = reg.file;
246 insn->bits1.da1.src0_reg_type = reg.type;
247 insn->bits2.da1.src0_abs = reg.abs;
248 insn->bits2.da1.src0_negate = reg.negate;
249 insn->bits2.da1.src0_address_mode = reg.address_mode;
250
251 if (reg.file == BRW_IMMEDIATE_VALUE) {
252 insn->bits3.ud = reg.dw1.ud;
253
254 /* Required to set some fields in src1 as well:
255 */
256 insn->bits1.da1.src1_reg_file = 0; /* arf */
257 insn->bits1.da1.src1_reg_type = reg.type;
258 } else {
259 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
260 if (insn->header.access_mode == BRW_ALIGN_1) {
261 insn->bits2.da1.src0_subreg_nr = reg.subnr;
262 insn->bits2.da1.src0_reg_nr = reg.nr;
263 } else {
264 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
265 insn->bits2.da16.src0_reg_nr = reg.nr;
266 }
267 } else {
268 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
269
270 if (insn->header.access_mode == BRW_ALIGN_1) {
271 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
272 } else {
273 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
274 }
275 }
276
277 if (insn->header.access_mode == BRW_ALIGN_1) {
278 if (reg.width == BRW_WIDTH_1 &&
279 insn->header.execution_size == BRW_EXECUTE_1) {
280 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
281 insn->bits2.da1.src0_width = BRW_WIDTH_1;
282 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
283 } else {
284 insn->bits2.da1.src0_horiz_stride = reg.hstride;
285 insn->bits2.da1.src0_width = reg.width;
286 insn->bits2.da1.src0_vert_stride = reg.vstride;
287 }
288 } else {
289 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
290 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
291 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
292 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
293
294 /* This is an oddity of the fact we're using the same
295 * descriptions for registers in align_16 as align_1:
296 */
297 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
298 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
299 else
300 insn->bits2.da16.src0_vert_stride = reg.vstride;
301 }
302 }
303 }
304
brw_set_src1(struct brw_compile * p,struct brw_instruction * insn,struct brw_reg reg)305 void brw_set_src1(struct brw_compile *p,
306 struct brw_instruction *insn,
307 struct brw_reg reg)
308 {
309 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
310 assert(reg.nr < 128);
311
312 gen7_convert_mrf_to_grf(p, ®);
313
314 validate_reg(insn, reg);
315
316 insn->bits1.da1.src1_reg_file = reg.file;
317 insn->bits1.da1.src1_reg_type = reg.type;
318 insn->bits3.da1.src1_abs = reg.abs;
319 insn->bits3.da1.src1_negate = reg.negate;
320
321 /* Only src1 can be immediate in two-argument instructions. */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 } else {
327 /* This is a hardware restriction, which may or may not be lifted
328 * in the future:
329 */
330 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
331 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
332
333 if (insn->header.access_mode == BRW_ALIGN_1) {
334 insn->bits3.da1.src1_subreg_nr = reg.subnr;
335 insn->bits3.da1.src1_reg_nr = reg.nr;
336 } else {
337 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
338 insn->bits3.da16.src1_reg_nr = reg.nr;
339 }
340
341 if (insn->header.access_mode == BRW_ALIGN_1) {
342 if (reg.width == BRW_WIDTH_1 &&
343 insn->header.execution_size == BRW_EXECUTE_1) {
344 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
345 insn->bits3.da1.src1_width = BRW_WIDTH_1;
346 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
347 } else {
348 insn->bits3.da1.src1_horiz_stride = reg.hstride;
349 insn->bits3.da1.src1_width = reg.width;
350 insn->bits3.da1.src1_vert_stride = reg.vstride;
351 }
352 } else {
353 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
354 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
355 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
356 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
357
358 /* This is an oddity of the fact we're using the same
359 * descriptions for registers in align_16 as align_1:
360 */
361 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
362 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
363 else
364 insn->bits3.da16.src1_vert_stride = reg.vstride;
365 }
366 }
367 }
368
369 /**
370 * Set the Message Descriptor and Extended Message Descriptor fields
371 * for SEND messages.
372 *
373 * \note This zeroes out the Function Control bits, so it must be called
374 * \b before filling out any message-specific data. Callers can
375 * choose not to fill in irrelevant bits; they will be zero.
376 */
377 static void
brw_set_message_descriptor(struct brw_compile * p,struct brw_instruction * inst,enum brw_message_target sfid,unsigned msg_length,unsigned response_length,bool header_present,bool end_of_thread)378 brw_set_message_descriptor(struct brw_compile *p,
379 struct brw_instruction *inst,
380 enum brw_message_target sfid,
381 unsigned msg_length,
382 unsigned response_length,
383 bool header_present,
384 bool end_of_thread)
385 {
386 brw_set_src1(p, inst, brw_imm_d(0));
387
388 if (p->gen >= 050) {
389 inst->bits3.generic_gen5.header_present = header_present;
390 inst->bits3.generic_gen5.response_length = response_length;
391 inst->bits3.generic_gen5.msg_length = msg_length;
392 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
393
394 if (p->gen >= 060) {
395 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
396 inst->header.destreg__conditionalmod = sfid;
397 } else {
398 /* Set Extended Message Descriptor (ex_desc) */
399 inst->bits2.send_gen5.sfid = sfid;
400 inst->bits2.send_gen5.end_of_thread = end_of_thread;
401 }
402 } else {
403 inst->bits3.generic.response_length = response_length;
404 inst->bits3.generic.msg_length = msg_length;
405 inst->bits3.generic.msg_target = sfid;
406 inst->bits3.generic.end_of_thread = end_of_thread;
407 }
408 }
409
410
brw_set_math_message(struct brw_compile * p,struct brw_instruction * insn,unsigned function,unsigned integer_type,bool low_precision,bool saturate,unsigned dataType)411 static void brw_set_math_message(struct brw_compile *p,
412 struct brw_instruction *insn,
413 unsigned function,
414 unsigned integer_type,
415 bool low_precision,
416 bool saturate,
417 unsigned dataType)
418 {
419 unsigned msg_length;
420 unsigned response_length;
421
422 /* Infer message length from the function */
423 switch (function) {
424 case BRW_MATH_FUNCTION_POW:
425 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
426 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
427 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
428 msg_length = 2;
429 break;
430 default:
431 msg_length = 1;
432 break;
433 }
434
435 /* Infer response length from the function */
436 switch (function) {
437 case BRW_MATH_FUNCTION_SINCOS:
438 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
439 response_length = 2;
440 break;
441 default:
442 response_length = 1;
443 break;
444 }
445
446 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
447 msg_length, response_length,
448 false, false);
449 if (p->gen == 050) {
450 insn->bits3.math_gen5.function = function;
451 insn->bits3.math_gen5.int_type = integer_type;
452 insn->bits3.math_gen5.precision = low_precision;
453 insn->bits3.math_gen5.saturate = saturate;
454 insn->bits3.math_gen5.data_type = dataType;
455 insn->bits3.math_gen5.snapshot = 0;
456 } else {
457 insn->bits3.math.function = function;
458 insn->bits3.math.int_type = integer_type;
459 insn->bits3.math.precision = low_precision;
460 insn->bits3.math.saturate = saturate;
461 insn->bits3.math.data_type = dataType;
462 }
463 }
464
brw_set_ff_sync_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,unsigned response_length,bool end_of_thread)465 static void brw_set_ff_sync_message(struct brw_compile *p,
466 struct brw_instruction *insn,
467 bool allocate,
468 unsigned response_length,
469 bool end_of_thread)
470 {
471 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
472 1, response_length,
473 true, end_of_thread);
474 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
475 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
476 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
477 insn->bits3.urb_gen5.allocate = allocate;
478 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
479 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
480 }
481
brw_set_urb_message(struct brw_compile * p,struct brw_instruction * insn,bool allocate,bool used,unsigned msg_length,unsigned response_length,bool end_of_thread,bool complete,unsigned offset,unsigned swizzle_control)482 static void brw_set_urb_message(struct brw_compile *p,
483 struct brw_instruction *insn,
484 bool allocate,
485 bool used,
486 unsigned msg_length,
487 unsigned response_length,
488 bool end_of_thread,
489 bool complete,
490 unsigned offset,
491 unsigned swizzle_control)
492 {
493 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
494 msg_length, response_length, true, end_of_thread);
495 if (p->gen >= 070) {
496 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
497 insn->bits3.urb_gen7.offset = offset;
498 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
499 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
500 /* per_slot_offset = 0 makes it ignore offsets in message header */
501 insn->bits3.urb_gen7.per_slot_offset = 0;
502 insn->bits3.urb_gen7.complete = complete;
503 } else if (p->gen >= 050) {
504 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
505 insn->bits3.urb_gen5.offset = offset;
506 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
507 insn->bits3.urb_gen5.allocate = allocate;
508 insn->bits3.urb_gen5.used = used; /* ? */
509 insn->bits3.urb_gen5.complete = complete;
510 } else {
511 insn->bits3.urb.opcode = 0; /* ? */
512 insn->bits3.urb.offset = offset;
513 insn->bits3.urb.swizzle_control = swizzle_control;
514 insn->bits3.urb.allocate = allocate;
515 insn->bits3.urb.used = used; /* ? */
516 insn->bits3.urb.complete = complete;
517 }
518 }
519
520 void
brw_set_dp_write_message(struct brw_compile * p,struct brw_instruction * insn,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned msg_length,bool header_present,bool last_render_target,unsigned response_length,bool end_of_thread,bool send_commit_msg)521 brw_set_dp_write_message(struct brw_compile *p,
522 struct brw_instruction *insn,
523 unsigned binding_table_index,
524 unsigned msg_control,
525 unsigned msg_type,
526 unsigned msg_length,
527 bool header_present,
528 bool last_render_target,
529 unsigned response_length,
530 bool end_of_thread,
531 bool send_commit_msg)
532 {
533 unsigned sfid;
534
535 if (p->gen >= 070) {
536 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
537 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
538 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
539 else
540 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
541 } else if (p->gen >= 060) {
542 /* Use the render cache for all write messages. */
543 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
544 } else {
545 sfid = BRW_SFID_DATAPORT_WRITE;
546 }
547
548 brw_set_message_descriptor(p, insn, sfid,
549 msg_length, response_length,
550 header_present, end_of_thread);
551
552 if (p->gen >= 070) {
553 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
554 insn->bits3.gen7_dp.msg_control = msg_control;
555 insn->bits3.gen7_dp.last_render_target = last_render_target;
556 insn->bits3.gen7_dp.msg_type = msg_type;
557 } else if (p->gen >= 060) {
558 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
559 insn->bits3.gen6_dp.msg_control = msg_control;
560 insn->bits3.gen6_dp.last_render_target = last_render_target;
561 insn->bits3.gen6_dp.msg_type = msg_type;
562 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
563 } else if (p->gen >= 050) {
564 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
565 insn->bits3.dp_write_gen5.msg_control = msg_control;
566 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
567 insn->bits3.dp_write_gen5.msg_type = msg_type;
568 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
569 } else {
570 insn->bits3.dp_write.binding_table_index = binding_table_index;
571 insn->bits3.dp_write.msg_control = msg_control;
572 insn->bits3.dp_write.last_render_target = last_render_target;
573 insn->bits3.dp_write.msg_type = msg_type;
574 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
575 }
576 }
577
578 void
brw_set_dp_read_message(struct brw_compile * p,struct brw_instruction * insn,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache,unsigned msg_length,unsigned response_length)579 brw_set_dp_read_message(struct brw_compile *p,
580 struct brw_instruction *insn,
581 unsigned binding_table_index,
582 unsigned msg_control,
583 unsigned msg_type,
584 unsigned target_cache,
585 unsigned msg_length,
586 unsigned response_length)
587 {
588 unsigned sfid;
589
590 if (p->gen >= 070) {
591 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
592 } else if (p->gen >= 060) {
593 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
594 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
595 else
596 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
597 } else {
598 sfid = BRW_SFID_DATAPORT_READ;
599 }
600
601 brw_set_message_descriptor(p, insn, sfid,
602 msg_length, response_length,
603 true, false);
604
605 if (p->gen >= 070) {
606 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
607 insn->bits3.gen7_dp.msg_control = msg_control;
608 insn->bits3.gen7_dp.last_render_target = 0;
609 insn->bits3.gen7_dp.msg_type = msg_type;
610 } else if (p->gen >= 060) {
611 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
612 insn->bits3.gen6_dp.msg_control = msg_control;
613 insn->bits3.gen6_dp.last_render_target = 0;
614 insn->bits3.gen6_dp.msg_type = msg_type;
615 insn->bits3.gen6_dp.send_commit_msg = 0;
616 } else if (p->gen >= 050) {
617 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
618 insn->bits3.dp_read_gen5.msg_control = msg_control;
619 insn->bits3.dp_read_gen5.msg_type = msg_type;
620 insn->bits3.dp_read_gen5.target_cache = target_cache;
621 } else if (p->gen >= 045) {
622 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
623 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
624 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
625 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
626 } else {
627 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
628 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
629 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
630 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
631 }
632 }
633
brw_set_sampler_message(struct brw_compile * p,struct brw_instruction * insn,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned response_length,unsigned msg_length,bool header_present,unsigned simd_mode)634 static void brw_set_sampler_message(struct brw_compile *p,
635 struct brw_instruction *insn,
636 unsigned binding_table_index,
637 unsigned sampler,
638 unsigned msg_type,
639 unsigned response_length,
640 unsigned msg_length,
641 bool header_present,
642 unsigned simd_mode)
643 {
644 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
645 msg_length, response_length,
646 header_present, false);
647
648 if (p->gen >= 070) {
649 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
650 insn->bits3.sampler_gen7.sampler = sampler;
651 insn->bits3.sampler_gen7.msg_type = msg_type;
652 insn->bits3.sampler_gen7.simd_mode = simd_mode;
653 } else if (p->gen >= 050) {
654 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
655 insn->bits3.sampler_gen5.sampler = sampler;
656 insn->bits3.sampler_gen5.msg_type = msg_type;
657 insn->bits3.sampler_gen5.simd_mode = simd_mode;
658 } else if (p->gen >= 045) {
659 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
660 insn->bits3.sampler_g4x.sampler = sampler;
661 insn->bits3.sampler_g4x.msg_type = msg_type;
662 } else {
663 insn->bits3.sampler.binding_table_index = binding_table_index;
664 insn->bits3.sampler.sampler = sampler;
665 insn->bits3.sampler.msg_type = msg_type;
666 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
667 }
668 }
669
670
brw_NOP(struct brw_compile * p)671 void brw_NOP(struct brw_compile *p)
672 {
673 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
674 brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
675 brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
676 brw_set_src1(p, insn, brw_imm_ud(0x0));
677 }
678
679 /***********************************************************************
680 * Comparisons, if/else/endif
681 */
682
683 static void
push_if_stack(struct brw_compile * p,struct brw_instruction * inst)684 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
685 {
686 p->if_stack[p->if_stack_depth] = inst;
687
688 p->if_stack_depth++;
689 if (p->if_stack_array_size <= p->if_stack_depth) {
690 p->if_stack_array_size *= 2;
691 p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
692 }
693 }
694
695 /* EU takes the value from the flag register and pushes it onto some
696 * sort of a stack (presumably merging with any flag value already on
697 * the stack). Within an if block, the flags at the top of the stack
698 * control execution on each channel of the unit, eg. on each of the
699 * 16 pixel values in our wm programs.
700 *
701 * When the matching 'else' instruction is reached (presumably by
702 * countdown of the instruction count patched in by our ELSE/ENDIF
703 * functions), the relevant flags are inverted.
704 *
705 * When the matching 'endif' instruction is reached, the flags are
706 * popped off. If the stack is now empty, normal execution resumes.
707 */
708 struct brw_instruction *
brw_IF(struct brw_compile * p,unsigned execute_size)709 brw_IF(struct brw_compile *p, unsigned execute_size)
710 {
711 struct brw_instruction *insn;
712
713 insn = brw_next_insn(p, BRW_OPCODE_IF);
714
715 /* Override the defaults for this instruction: */
716 if (p->gen < 060) {
717 brw_set_dest(p, insn, brw_ip_reg());
718 brw_set_src0(p, insn, brw_ip_reg());
719 brw_set_src1(p, insn, brw_imm_d(0x0));
720 } else if (p->gen < 070) {
721 brw_set_dest(p, insn, brw_imm_w(0));
722 insn->bits1.branch_gen6.jump_count = 0;
723 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
724 brw_set_src1(p, insn, __retype_d(brw_null_reg()));
725 } else {
726 brw_set_dest(p, insn, __retype_d(brw_null_reg()));
727 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
728 brw_set_src1(p, insn, brw_imm_ud(0));
729 insn->bits3.break_cont.jip = 0;
730 insn->bits3.break_cont.uip = 0;
731 }
732
733 insn->header.execution_size = execute_size;
734 insn->header.compression_control = BRW_COMPRESSION_NONE;
735 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
736 insn->header.mask_control = BRW_MASK_ENABLE;
737 if (!p->single_program_flow)
738 insn->header.thread_control = BRW_THREAD_SWITCH;
739
740 p->current->header.predicate_control = BRW_PREDICATE_NONE;
741
742 push_if_stack(p, insn);
743 return insn;
744 }
745
746 /* This function is only used for gen6-style IF instructions with an
747 * embedded comparison (conditional modifier). It is not used on gen7.
748 */
749 struct brw_instruction *
gen6_IF(struct brw_compile * p,uint32_t conditional,struct brw_reg src0,struct brw_reg src1)750 gen6_IF(struct brw_compile *p, uint32_t conditional,
751 struct brw_reg src0, struct brw_reg src1)
752 {
753 struct brw_instruction *insn;
754
755 insn = brw_next_insn(p, BRW_OPCODE_IF);
756
757 brw_set_dest(p, insn, brw_imm_w(0));
758 if (p->compressed) {
759 insn->header.execution_size = BRW_EXECUTE_16;
760 } else {
761 insn->header.execution_size = BRW_EXECUTE_8;
762 }
763 insn->bits1.branch_gen6.jump_count = 0;
764 brw_set_src0(p, insn, src0);
765 brw_set_src1(p, insn, src1);
766
767 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
768 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
769 insn->header.destreg__conditionalmod = conditional;
770
771 if (!p->single_program_flow)
772 insn->header.thread_control = BRW_THREAD_SWITCH;
773
774 push_if_stack(p, insn);
775 return insn;
776 }
777
778 /**
779 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
780 */
781 static void
convert_IF_ELSE_to_ADD(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst)782 convert_IF_ELSE_to_ADD(struct brw_compile *p,
783 struct brw_instruction *if_inst,
784 struct brw_instruction *else_inst)
785 {
786 /* The next instruction (where the ENDIF would be, if it existed) */
787 struct brw_instruction *next_inst = &p->store[p->nr_insn];
788
789 assert(p->single_program_flow);
790 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
791 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
792 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
793
794 /* Convert IF to an ADD instruction that moves the instruction pointer
795 * to the first instruction of the ELSE block. If there is no ELSE
796 * block, point to where ENDIF would be. Reverse the predicate.
797 *
798 * There's no need to execute an ENDIF since we don't need to do any
799 * stack operations, and if we're currently executing, we just want to
800 * continue normally.
801 */
802 if_inst->header.opcode = BRW_OPCODE_ADD;
803 if_inst->header.predicate_inverse = 1;
804
805 if (else_inst != NULL) {
806 /* Convert ELSE to an ADD instruction that points where the ENDIF
807 * would be.
808 */
809 else_inst->header.opcode = BRW_OPCODE_ADD;
810
811 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
812 else_inst->bits3.ud = (next_inst - else_inst) * 16;
813 } else {
814 if_inst->bits3.ud = (next_inst - if_inst) * 16;
815 }
816 }
817
818 /**
819 * Patch IF and ELSE instructions with appropriate jump targets.
820 */
821 static void
patch_IF_ELSE(struct brw_compile * p,struct brw_instruction * if_inst,struct brw_instruction * else_inst,struct brw_instruction * endif_inst)822 patch_IF_ELSE(struct brw_compile *p,
823 struct brw_instruction *if_inst,
824 struct brw_instruction *else_inst,
825 struct brw_instruction *endif_inst)
826 {
827 unsigned br = 1;
828
829 assert(!p->single_program_flow);
830 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
831 assert(endif_inst != NULL);
832 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
833
834 /* Jump count is for 64bit data chunk each, so one 128bit instruction
835 * requires 2 chunks.
836 */
837 if (p->gen >= 050)
838 br = 2;
839
840 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
841 endif_inst->header.execution_size = if_inst->header.execution_size;
842
843 if (else_inst == NULL) {
844 /* Patch IF -> ENDIF */
845 if (p->gen < 060) {
846 /* Turn it into an IFF, which means no mask stack operations for
847 * all-false and jumping past the ENDIF.
848 */
849 if_inst->header.opcode = BRW_OPCODE_IFF;
850 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
851 if_inst->bits3.if_else.pop_count = 0;
852 if_inst->bits3.if_else.pad0 = 0;
853 } else if (p->gen < 070) {
854 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
855 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
856 } else {
857 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
858 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
859 }
860 } else {
861 else_inst->header.execution_size = if_inst->header.execution_size;
862
863 /* Patch IF -> ELSE */
864 if (p->gen < 060) {
865 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
866 if_inst->bits3.if_else.pop_count = 0;
867 if_inst->bits3.if_else.pad0 = 0;
868 } else if (p->gen <= 070) {
869 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
870 }
871
872 /* Patch ELSE -> ENDIF */
873 if (p->gen < 060) {
874 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
875 * matching ENDIF.
876 */
877 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
878 else_inst->bits3.if_else.pop_count = 1;
879 else_inst->bits3.if_else.pad0 = 0;
880 } else if (p->gen < 070) {
881 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
882 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
883 } else {
884 /* The IF instruction's JIP should point just past the ELSE */
885 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
886 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
887 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
888 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
889 }
890 }
891 }
892
893 void
brw_ELSE(struct brw_compile * p)894 brw_ELSE(struct brw_compile *p)
895 {
896 struct brw_instruction *insn;
897
898 insn = brw_next_insn(p, BRW_OPCODE_ELSE);
899
900 if (p->gen < 060) {
901 brw_set_dest(p, insn, brw_ip_reg());
902 brw_set_src0(p, insn, brw_ip_reg());
903 brw_set_src1(p, insn, brw_imm_d(0x0));
904 } else if (p->gen < 070) {
905 brw_set_dest(p, insn, brw_imm_w(0));
906 insn->bits1.branch_gen6.jump_count = 0;
907 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
908 brw_set_src1(p, insn, __retype_d(brw_null_reg()));
909 } else {
910 brw_set_dest(p, insn, __retype_d(brw_null_reg()));
911 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
912 brw_set_src1(p, insn, brw_imm_ud(0));
913 insn->bits3.break_cont.jip = 0;
914 insn->bits3.break_cont.uip = 0;
915 }
916
917 insn->header.compression_control = BRW_COMPRESSION_NONE;
918 insn->header.mask_control = BRW_MASK_ENABLE;
919 if (!p->single_program_flow)
920 insn->header.thread_control = BRW_THREAD_SWITCH;
921
922 push_if_stack(p, insn);
923 }
924
925 void
brw_ENDIF(struct brw_compile * p)926 brw_ENDIF(struct brw_compile *p)
927 {
928 struct brw_instruction *insn;
929 struct brw_instruction *else_inst = NULL;
930 struct brw_instruction *if_inst = NULL;
931
932 /* Pop the IF and (optional) ELSE instructions from the stack */
933 p->if_stack_depth--;
934 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
935 else_inst = p->if_stack[p->if_stack_depth];
936 p->if_stack_depth--;
937 }
938 if_inst = p->if_stack[p->if_stack_depth];
939
940 if (p->single_program_flow) {
941 /* ENDIF is useless; don't bother emitting it. */
942 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
943 return;
944 }
945
946 insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
947
948 if (p->gen < 060) {
949 brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
950 brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
951 brw_set_src1(p, insn, brw_imm_d(0x0));
952 } else if (p->gen < 070) {
953 brw_set_dest(p, insn, brw_imm_w(0));
954 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
955 brw_set_src1(p, insn, __retype_d(brw_null_reg()));
956 } else {
957 brw_set_dest(p, insn, __retype_d(brw_null_reg()));
958 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
959 brw_set_src1(p, insn, brw_imm_ud(0));
960 }
961
962 insn->header.compression_control = BRW_COMPRESSION_NONE;
963 insn->header.mask_control = BRW_MASK_ENABLE;
964 insn->header.thread_control = BRW_THREAD_SWITCH;
965
966 /* Also pop item off the stack in the endif instruction: */
967 if (p->gen < 060) {
968 insn->bits3.if_else.jump_count = 0;
969 insn->bits3.if_else.pop_count = 1;
970 insn->bits3.if_else.pad0 = 0;
971 } else if (p->gen < 070) {
972 insn->bits1.branch_gen6.jump_count = 2;
973 } else {
974 insn->bits3.break_cont.jip = 2;
975 }
976 patch_IF_ELSE(p, if_inst, else_inst, insn);
977 }
978
brw_BREAK(struct brw_compile * p,int pop_count)979 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
980 {
981 struct brw_instruction *insn;
982
983 insn = brw_next_insn(p, BRW_OPCODE_BREAK);
984 if (p->gen >= 060) {
985 brw_set_dest(p, insn, __retype_d(brw_null_reg()));
986 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
987 brw_set_src1(p, insn, brw_imm_d(0x0));
988 } else {
989 brw_set_dest(p, insn, brw_ip_reg());
990 brw_set_src0(p, insn, brw_ip_reg());
991 brw_set_src1(p, insn, brw_imm_d(0x0));
992 insn->bits3.if_else.pad0 = 0;
993 insn->bits3.if_else.pop_count = pop_count;
994 }
995 insn->header.compression_control = BRW_COMPRESSION_NONE;
996 insn->header.execution_size = BRW_EXECUTE_8;
997
998 return insn;
999 }
1000
gen6_CONT(struct brw_compile * p,struct brw_instruction * do_insn)1001 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1002 struct brw_instruction *do_insn)
1003 {
1004 struct brw_instruction *insn;
1005
1006 insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1007 brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1008 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1009 brw_set_dest(p, insn, brw_ip_reg());
1010 brw_set_src0(p, insn, brw_ip_reg());
1011 brw_set_src1(p, insn, brw_imm_d(0x0));
1012
1013 insn->header.compression_control = BRW_COMPRESSION_NONE;
1014 insn->header.execution_size = BRW_EXECUTE_8;
1015 return insn;
1016 }
1017
brw_CONT(struct brw_compile * p,int pop_count)1018 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1019 {
1020 struct brw_instruction *insn;
1021 insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1022 brw_set_dest(p, insn, brw_ip_reg());
1023 brw_set_src0(p, insn, brw_ip_reg());
1024 brw_set_src1(p, insn, brw_imm_d(0x0));
1025 insn->header.compression_control = BRW_COMPRESSION_NONE;
1026 insn->header.execution_size = BRW_EXECUTE_8;
1027 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1028 insn->bits3.if_else.pad0 = 0;
1029 insn->bits3.if_else.pop_count = pop_count;
1030 return insn;
1031 }
1032
1033 /* DO/WHILE loop:
1034 *
1035 * The DO/WHILE is just an unterminated loop -- break or continue are
1036 * used for control within the loop. We have a few ways they can be
1037 * done.
1038 *
1039 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1040 * jip and no DO instruction.
1041 *
1042 * For non-uniform control flow pre-gen6, there's a DO instruction to
1043 * push the mask, and a WHILE to jump back, and BREAK to get out and
1044 * pop the mask.
1045 *
1046 * For gen6, there's no more mask stack, so no need for DO. WHILE
1047 * just points back to the first instruction of the loop.
1048 */
brw_DO(struct brw_compile * p,unsigned execute_size)1049 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1050 {
1051 if (p->gen >= 060 || p->single_program_flow) {
1052 return &p->store[p->nr_insn];
1053 } else {
1054 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
1055
1056 /* Override the defaults for this instruction:
1057 */
1058 brw_set_dest(p, insn, brw_null_reg());
1059 brw_set_src0(p, insn, brw_null_reg());
1060 brw_set_src1(p, insn, brw_null_reg());
1061
1062 insn->header.compression_control = BRW_COMPRESSION_NONE;
1063 insn->header.execution_size = execute_size;
1064 insn->header.predicate_control = BRW_PREDICATE_NONE;
1065 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1066 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1067
1068 return insn;
1069 }
1070 }
1071
brw_WHILE(struct brw_compile * p,struct brw_instruction * do_insn)1072 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1073 struct brw_instruction *do_insn)
1074 {
1075 struct brw_instruction *insn;
1076 unsigned br = 1;
1077
1078 if (p->gen >= 050)
1079 br = 2;
1080
1081 if (p->gen >= 070) {
1082 insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1083
1084 brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1085 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1086 brw_set_src1(p, insn, brw_imm_ud(0));
1087 insn->bits3.break_cont.jip = br * (do_insn - insn);
1088
1089 insn->header.execution_size = BRW_EXECUTE_8;
1090 } else if (p->gen >= 060) {
1091 insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1092
1093 brw_set_dest(p, insn, brw_imm_w(0));
1094 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1095 brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1096 brw_set_src1(p, insn, __retype_d(brw_null_reg()));
1097
1098 insn->header.execution_size = BRW_EXECUTE_8;
1099 } else {
1100 if (p->single_program_flow) {
1101 insn = brw_next_insn(p, BRW_OPCODE_ADD);
1102
1103 brw_set_dest(p, insn, brw_ip_reg());
1104 brw_set_src0(p, insn, brw_ip_reg());
1105 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1106 insn->header.execution_size = BRW_EXECUTE_1;
1107 } else {
1108 insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1109
1110 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1111
1112 brw_set_dest(p, insn, brw_ip_reg());
1113 brw_set_src0(p, insn, brw_ip_reg());
1114 brw_set_src1(p, insn, brw_imm_d(0));
1115
1116 insn->header.execution_size = do_insn->header.execution_size;
1117 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1118 insn->bits3.if_else.pop_count = 0;
1119 insn->bits3.if_else.pad0 = 0;
1120 }
1121 }
1122 insn->header.compression_control = BRW_COMPRESSION_NONE;
1123 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1124
1125 return insn;
1126 }
1127
1128 /* FORWARD JUMPS:
1129 */
brw_land_fwd_jump(struct brw_compile * p,struct brw_instruction * jmp_insn)1130 void brw_land_fwd_jump(struct brw_compile *p,
1131 struct brw_instruction *jmp_insn)
1132 {
1133 struct brw_instruction *landing = &p->store[p->nr_insn];
1134 unsigned jmpi = 1;
1135
1136 if (p->gen >= 050)
1137 jmpi = 2;
1138
1139 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1140 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1141
1142 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1143 }
1144
1145
1146
1147 /* To integrate with the above, it makes sense that the comparison
1148 * instruction should populate the flag register. It might be simpler
1149 * just to use the flag reg for most WM tasks?
1150 */
brw_CMP(struct brw_compile * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1151 void brw_CMP(struct brw_compile *p,
1152 struct brw_reg dest,
1153 unsigned conditional,
1154 struct brw_reg src0,
1155 struct brw_reg src1)
1156 {
1157 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
1158
1159 insn->header.destreg__conditionalmod = conditional;
1160 brw_set_dest(p, insn, dest);
1161 brw_set_src0(p, insn, src0);
1162 brw_set_src1(p, insn, src1);
1163
1164 /* Make it so that future instructions will use the computed flag
1165 * value until brw_set_predicate_control_flag_value() is called
1166 * again.
1167 */
1168 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1169 dest.nr == 0) {
1170 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1171 p->flag_value = 0xff;
1172 }
1173 }
1174
1175 /* Issue 'wait' instruction for n1, host could program MMIO
1176 to wake up thread. */
brw_WAIT(struct brw_compile * p)1177 void brw_WAIT(struct brw_compile *p)
1178 {
1179 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
1180 struct brw_reg src = brw_notification_1_reg();
1181
1182 brw_set_dest(p, insn, src);
1183 brw_set_src0(p, insn, src);
1184 brw_set_src1(p, insn, brw_null_reg());
1185 insn->header.execution_size = 0; /* must */
1186 insn->header.predicate_control = 0;
1187 insn->header.compression_control = 0;
1188 }
1189
1190 /***********************************************************************
1191 * Helpers for the various SEND message types:
1192 */
1193
1194 /** Extended math function, float[8].
1195 */
brw_math(struct brw_compile * p,struct brw_reg dest,unsigned function,unsigned saturate,unsigned msg_reg_nr,struct brw_reg src,unsigned data_type,unsigned precision)1196 void brw_math(struct brw_compile *p,
1197 struct brw_reg dest,
1198 unsigned function,
1199 unsigned saturate,
1200 unsigned msg_reg_nr,
1201 struct brw_reg src,
1202 unsigned data_type,
1203 unsigned precision)
1204 {
1205 if (p->gen >= 060) {
1206 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1207
1208 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1209 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1210
1211 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1212 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1213
1214 /* Source modifiers are ignored for extended math instructions. */
1215 assert(!src.negate);
1216 assert(!src.abs);
1217
1218 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1219 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1220 assert(src.type == BRW_REGISTER_TYPE_F);
1221 }
1222
1223 /* Math is the same ISA format as other opcodes, except that CondModifier
1224 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1225 */
1226 insn->header.destreg__conditionalmod = function;
1227 insn->header.saturate = saturate;
1228
1229 brw_set_dest(p, insn, dest);
1230 brw_set_src0(p, insn, src);
1231 brw_set_src1(p, insn, brw_null_reg());
1232 } else {
1233 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1234 /* Example code doesn't set predicate_control for send
1235 * instructions.
1236 */
1237 insn->header.predicate_control = 0;
1238 insn->header.destreg__conditionalmod = msg_reg_nr;
1239
1240 brw_set_dest(p, insn, dest);
1241 brw_set_src0(p, insn, src);
1242 brw_set_math_message(p, insn, function,
1243 src.type == BRW_REGISTER_TYPE_D,
1244 precision,
1245 saturate,
1246 data_type);
1247 }
1248 }
1249
1250 /** Extended math function, float[8].
1251 */
brw_math2(struct brw_compile * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1252 void brw_math2(struct brw_compile *p,
1253 struct brw_reg dest,
1254 unsigned function,
1255 struct brw_reg src0,
1256 struct brw_reg src1)
1257 {
1258 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1259
1260 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1261 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1262 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1263
1264 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1265 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1266 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1267
1268 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1269 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1270 assert(src0.type == BRW_REGISTER_TYPE_F);
1271 assert(src1.type == BRW_REGISTER_TYPE_F);
1272 }
1273
1274 /* Source modifiers are ignored for extended math instructions. */
1275 assert(!src0.negate);
1276 assert(!src0.abs);
1277 assert(!src1.negate);
1278 assert(!src1.abs);
1279
1280 /* Math is the same ISA format as other opcodes, except that CondModifier
1281 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1282 */
1283 insn->header.destreg__conditionalmod = function;
1284
1285 brw_set_dest(p, insn, dest);
1286 brw_set_src0(p, insn, src0);
1287 brw_set_src1(p, insn, src1);
1288 }
1289
1290 /**
1291 * Extended math function, float[16].
1292 * Use 2 send instructions.
1293 */
brw_math_16(struct brw_compile * p,struct brw_reg dest,unsigned function,unsigned saturate,unsigned msg_reg_nr,struct brw_reg src,unsigned precision)1294 void brw_math_16(struct brw_compile *p,
1295 struct brw_reg dest,
1296 unsigned function,
1297 unsigned saturate,
1298 unsigned msg_reg_nr,
1299 struct brw_reg src,
1300 unsigned precision)
1301 {
1302 struct brw_instruction *insn;
1303
1304 if (p->gen >= 060) {
1305 insn = brw_next_insn(p, BRW_OPCODE_MATH);
1306
1307 /* Math is the same ISA format as other opcodes, except that CondModifier
1308 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1309 */
1310 insn->header.destreg__conditionalmod = function;
1311 insn->header.saturate = saturate;
1312
1313 /* Source modifiers are ignored for extended math instructions. */
1314 assert(!src.negate);
1315 assert(!src.abs);
1316
1317 brw_set_dest(p, insn, dest);
1318 brw_set_src0(p, insn, src);
1319 brw_set_src1(p, insn, brw_null_reg());
1320 return;
1321 }
1322
1323 /* First instruction:
1324 */
1325 brw_push_insn_state(p);
1326 brw_set_predicate_control_flag_value(p, 0xff);
1327 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1328
1329 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1330 insn->header.destreg__conditionalmod = msg_reg_nr;
1331
1332 brw_set_dest(p, insn, dest);
1333 brw_set_src0(p, insn, src);
1334 brw_set_math_message(p, insn, function,
1335 BRW_MATH_INTEGER_UNSIGNED,
1336 precision,
1337 saturate,
1338 BRW_MATH_DATA_VECTOR);
1339
1340 /* Second instruction:
1341 */
1342 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1343 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1344 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1345
1346 brw_set_dest(p, insn, __offset(dest,1));
1347 brw_set_src0(p, insn, src);
1348 brw_set_math_message(p, insn, function,
1349 BRW_MATH_INTEGER_UNSIGNED,
1350 precision,
1351 saturate,
1352 BRW_MATH_DATA_VECTOR);
1353
1354 brw_pop_insn_state(p);
1355 }
1356
1357 /**
1358 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1359 * using a constant offset per channel.
1360 *
1361 * The offset must be aligned to oword size (16 bytes). Used for
1362 * register spilling.
1363 */
brw_oword_block_write_scratch(struct brw_compile * p,struct brw_reg mrf,int num_regs,unsigned offset)1364 void brw_oword_block_write_scratch(struct brw_compile *p,
1365 struct brw_reg mrf,
1366 int num_regs,
1367 unsigned offset)
1368 {
1369 uint32_t msg_control, msg_type;
1370 int mlen;
1371
1372 if (p->gen >= 060)
1373 offset /= 16;
1374
1375 mrf = __retype_ud(mrf);
1376
1377 if (num_regs == 1) {
1378 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1379 mlen = 2;
1380 } else {
1381 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1382 mlen = 3;
1383 }
1384
1385 /* Set up the message header. This is g0, with g0.2 filled with
1386 * the offset. We don't want to leave our offset around in g0 or
1387 * it'll screw up texture samples, so set it up inside the message
1388 * reg.
1389 */
1390 {
1391 brw_push_insn_state(p);
1392 brw_set_mask_control(p, BRW_MASK_DISABLE);
1393 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1394
1395 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1396
1397 /* set message header global offset field (reg 0, element 2) */
1398 brw_MOV(p,
1399 __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1400 brw_imm_ud(offset));
1401
1402 brw_pop_insn_state(p);
1403 }
1404
1405 {
1406 struct brw_reg dest;
1407 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1408 int send_commit_msg;
1409 struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
1410
1411 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1412 insn->header.compression_control = BRW_COMPRESSION_NONE;
1413 src_header = vec16(src_header);
1414 }
1415 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1416 insn->header.destreg__conditionalmod = mrf.nr;
1417
1418 /* Until gen6, writes followed by reads from the same location
1419 * are not guaranteed to be ordered unless write_commit is set.
1420 * If set, then a no-op write is issued to the destination
1421 * register to set a dependency, and a read from the destination
1422 * can be used to ensure the ordering.
1423 *
1424 * For gen6, only writes between different threads need ordering
1425 * protection. Our use of DP writes is all about register
1426 * spilling within a thread.
1427 */
1428 if (p->gen >= 060) {
1429 dest = __retype_uw(vec16(brw_null_reg()));
1430 send_commit_msg = 0;
1431 } else {
1432 dest = src_header;
1433 send_commit_msg = 1;
1434 }
1435
1436 brw_set_dest(p, insn, dest);
1437 if (p->gen >= 060) {
1438 brw_set_src0(p, insn, mrf);
1439 } else {
1440 brw_set_src0(p, insn, brw_null_reg());
1441 }
1442
1443 if (p->gen >= 060)
1444 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1445 else
1446 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1447
1448 brw_set_dp_write_message(p,
1449 insn,
1450 255, /* binding table index (255=stateless) */
1451 msg_control,
1452 msg_type,
1453 mlen,
1454 true, /* header_present */
1455 0, /* pixel scoreboard */
1456 send_commit_msg, /* response_length */
1457 0, /* eot */
1458 send_commit_msg);
1459 }
1460 }
1461
1462
1463 /**
1464 * Read a block of owords (half a GRF each) from the scratch buffer
1465 * using a constant index per channel.
1466 *
1467 * Offset must be aligned to oword size (16 bytes). Used for register
1468 * spilling.
1469 */
1470 void
brw_oword_block_read_scratch(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,int num_regs,unsigned offset)1471 brw_oword_block_read_scratch(struct brw_compile *p,
1472 struct brw_reg dest,
1473 struct brw_reg mrf,
1474 int num_regs,
1475 unsigned offset)
1476 {
1477 uint32_t msg_control;
1478 int rlen;
1479
1480 if (p->gen >= 060)
1481 offset /= 16;
1482
1483 mrf = __retype_ud(mrf);
1484 dest = __retype_uw(dest);
1485
1486 if (num_regs == 1) {
1487 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1488 rlen = 1;
1489 } else {
1490 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1491 rlen = 2;
1492 }
1493
1494 {
1495 brw_push_insn_state(p);
1496 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1497 brw_set_mask_control(p, BRW_MASK_DISABLE);
1498
1499 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1500
1501 /* set message header global offset field (reg 0, element 2) */
1502 brw_MOV(p,
1503 __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1504 brw_imm_ud(offset));
1505
1506 brw_pop_insn_state(p);
1507 }
1508
1509 {
1510 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1511
1512 assert(insn->header.predicate_control == 0);
1513 insn->header.compression_control = BRW_COMPRESSION_NONE;
1514 insn->header.destreg__conditionalmod = mrf.nr;
1515
1516 brw_set_dest(p, insn, dest); /* UW? */
1517 if (p->gen >= 060) {
1518 brw_set_src0(p, insn, mrf);
1519 } else {
1520 brw_set_src0(p, insn, brw_null_reg());
1521 }
1522
1523 brw_set_dp_read_message(p,
1524 insn,
1525 255, /* binding table index (255=stateless) */
1526 msg_control,
1527 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1528 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1529 1, /* msg_length */
1530 rlen);
1531 }
1532 }
1533
1534 /**
1535 * Read a float[4] vector from the data port Data Cache (const buffer).
1536 * Location (in buffer) should be a multiple of 16.
1537 * Used for fetching shader constants.
1538 */
brw_oword_block_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t offset,uint32_t bind_table_index)1539 void brw_oword_block_read(struct brw_compile *p,
1540 struct brw_reg dest,
1541 struct brw_reg mrf,
1542 uint32_t offset,
1543 uint32_t bind_table_index)
1544 {
1545 struct brw_instruction *insn;
1546
1547 /* On newer hardware, offset is in units of owords. */
1548 if (p->gen >= 060)
1549 offset /= 16;
1550
1551 mrf = __retype_ud(mrf);
1552
1553 brw_push_insn_state(p);
1554 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1555 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1556 brw_set_mask_control(p, BRW_MASK_DISABLE);
1557
1558 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1559
1560 /* set message header global offset field (reg 0, element 2) */
1561 brw_MOV(p,
1562 __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1563 brw_imm_ud(offset));
1564
1565 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1566 insn->header.destreg__conditionalmod = mrf.nr;
1567
1568 /* cast dest to a uword[8] vector */
1569 dest = __retype_uw(vec8(dest));
1570
1571 brw_set_dest(p, insn, dest);
1572 if (p->gen >= 060) {
1573 brw_set_src0(p, insn, mrf);
1574 } else {
1575 brw_set_src0(p, insn, brw_null_reg());
1576 }
1577
1578 brw_set_dp_read_message(p,
1579 insn,
1580 bind_table_index,
1581 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1582 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1583 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1584 1, /* msg_length */
1585 1); /* response_length (1 reg, 2 owords!) */
1586
1587 brw_pop_insn_state(p);
1588 }
1589
1590 /**
1591 * Read a set of dwords from the data port Data Cache (const buffer).
1592 *
1593 * Location (in buffer) appears as UD offsets in the register after
1594 * the provided mrf header reg.
1595 */
brw_dword_scattered_read(struct brw_compile * p,struct brw_reg dest,struct brw_reg mrf,uint32_t bind_table_index)1596 void brw_dword_scattered_read(struct brw_compile *p,
1597 struct brw_reg dest,
1598 struct brw_reg mrf,
1599 uint32_t bind_table_index)
1600 {
1601 struct brw_instruction *insn;
1602
1603 mrf = __retype_ud(mrf);
1604
1605 brw_push_insn_state(p);
1606 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1607 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1608 brw_set_mask_control(p, BRW_MASK_DISABLE);
1609 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1610 brw_pop_insn_state(p);
1611
1612 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1613 insn->header.destreg__conditionalmod = mrf.nr;
1614
1615 /* cast dest to a uword[8] vector */
1616 dest = __retype_uw(vec8(dest));
1617
1618 brw_set_dest(p, insn, dest);
1619 brw_set_src0(p, insn, brw_null_reg());
1620
1621 brw_set_dp_read_message(p,
1622 insn,
1623 bind_table_index,
1624 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1625 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1626 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1627 2, /* msg_length */
1628 1); /* response_length */
1629 }
1630
1631 /**
1632 * Read float[4] constant(s) from VS constant buffer.
1633 * For relative addressing, two float[4] constants will be read into 'dest'.
1634 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1635 */
brw_dp_READ_4_vs(struct brw_compile * p,struct brw_reg dest,unsigned location,unsigned bind_table_index)1636 void brw_dp_READ_4_vs(struct brw_compile *p,
1637 struct brw_reg dest,
1638 unsigned location,
1639 unsigned bind_table_index)
1640 {
1641 struct brw_instruction *insn;
1642 unsigned msg_reg_nr = 1;
1643
1644 if (p->gen >= 060)
1645 location /= 16;
1646
1647 /* Setup MRF[1] with location/offset into const buffer */
1648 brw_push_insn_state(p);
1649 brw_set_access_mode(p, BRW_ALIGN_1);
1650 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1651 brw_set_mask_control(p, BRW_MASK_DISABLE);
1652 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1653 brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
1654 brw_imm_ud(location));
1655 brw_pop_insn_state(p);
1656
1657 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1658
1659 insn->header.predicate_control = BRW_PREDICATE_NONE;
1660 insn->header.compression_control = BRW_COMPRESSION_NONE;
1661 insn->header.destreg__conditionalmod = msg_reg_nr;
1662 insn->header.mask_control = BRW_MASK_DISABLE;
1663
1664 brw_set_dest(p, insn, dest);
1665 if (p->gen >= 060) {
1666 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1667 } else {
1668 brw_set_src0(p, insn, brw_null_reg());
1669 }
1670
1671 brw_set_dp_read_message(p,
1672 insn,
1673 bind_table_index,
1674 0,
1675 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1676 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1677 1, /* msg_length */
1678 1); /* response_length (1 Oword) */
1679 }
1680
1681 /**
1682 * Read a float[4] constant per vertex from VS constant buffer, with
1683 * relative addressing.
1684 */
brw_dp_READ_4_vs_relative(struct brw_compile * p,struct brw_reg dest,struct brw_reg addr_reg,unsigned offset,unsigned bind_table_index)1685 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1686 struct brw_reg dest,
1687 struct brw_reg addr_reg,
1688 unsigned offset,
1689 unsigned bind_table_index)
1690 {
1691 struct brw_reg src = brw_vec8_grf(0, 0);
1692 struct brw_instruction *insn;
1693 int msg_type;
1694
1695 /* Setup MRF[1] with offset into const buffer */
1696 brw_push_insn_state(p);
1697 brw_set_access_mode(p, BRW_ALIGN_1);
1698 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1699 brw_set_mask_control(p, BRW_MASK_DISABLE);
1700 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1701
1702 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1703 * fields ignored.
1704 */
1705 brw_ADD(p, __retype_d(brw_message_reg(1)),
1706 addr_reg, brw_imm_d(offset));
1707 brw_pop_insn_state(p);
1708
1709 gen6_resolve_implied_move(p, &src, 0);
1710
1711 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1712 insn->header.predicate_control = BRW_PREDICATE_NONE;
1713 insn->header.compression_control = BRW_COMPRESSION_NONE;
1714 insn->header.destreg__conditionalmod = 0;
1715 insn->header.mask_control = BRW_MASK_DISABLE;
1716
1717 brw_set_dest(p, insn, dest);
1718 brw_set_src0(p, insn, src);
1719
1720 if (p->gen >= 060)
1721 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1722 else if (p->gen >= 045)
1723 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1724 else
1725 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1726
1727 brw_set_dp_read_message(p,
1728 insn,
1729 bind_table_index,
1730 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1731 msg_type,
1732 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1733 2, /* msg_length */
1734 1); /* response_length */
1735 }
1736
brw_fb_WRITE(struct brw_compile * p,int dispatch_width,unsigned msg_reg_nr,struct brw_reg src0,unsigned msg_control,unsigned binding_table_index,unsigned msg_length,unsigned response_length,bool eot,bool header_present)1737 void brw_fb_WRITE(struct brw_compile *p,
1738 int dispatch_width,
1739 unsigned msg_reg_nr,
1740 struct brw_reg src0,
1741 unsigned msg_control,
1742 unsigned binding_table_index,
1743 unsigned msg_length,
1744 unsigned response_length,
1745 bool eot,
1746 bool header_present)
1747 {
1748 struct brw_instruction *insn;
1749 unsigned msg_type;
1750 struct brw_reg dest;
1751
1752 if (dispatch_width == 16)
1753 dest = __retype_uw(vec16(brw_null_reg()));
1754 else
1755 dest = __retype_uw(vec8(brw_null_reg()));
1756
1757 if (p->gen >= 060 && binding_table_index == 0) {
1758 insn = brw_next_insn(p, BRW_OPCODE_SENDC);
1759 } else {
1760 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1761 }
1762 /* The execution mask is ignored for render target writes. */
1763 insn->header.predicate_control = 0;
1764 insn->header.compression_control = BRW_COMPRESSION_NONE;
1765
1766 if (p->gen >= 060) {
1767 /* headerless version, just submit color payload */
1768 src0 = brw_message_reg(msg_reg_nr);
1769
1770 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1771 } else {
1772 insn->header.destreg__conditionalmod = msg_reg_nr;
1773
1774 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1775 }
1776
1777 brw_set_dest(p, insn, dest);
1778 brw_set_src0(p, insn, src0);
1779 brw_set_dp_write_message(p,
1780 insn,
1781 binding_table_index,
1782 msg_control,
1783 msg_type,
1784 msg_length,
1785 header_present,
1786 eot,
1787 response_length,
1788 eot,
1789 0 /* send_commit_msg */);
1790 }
1791
1792 /**
1793 * Texture sample instruction.
1794 * Note: the msg_type plus msg_length values determine exactly what kind
1795 * of sampling operation is performed. See volume 4, page 161 of docs.
1796 */
brw_SAMPLE(struct brw_compile * p,struct brw_reg dest,unsigned msg_reg_nr,struct brw_reg src0,unsigned binding_table_index,unsigned sampler,unsigned writemask,unsigned msg_type,unsigned response_length,unsigned msg_length,bool header_present,unsigned simd_mode)1797 void brw_SAMPLE(struct brw_compile *p,
1798 struct brw_reg dest,
1799 unsigned msg_reg_nr,
1800 struct brw_reg src0,
1801 unsigned binding_table_index,
1802 unsigned sampler,
1803 unsigned writemask,
1804 unsigned msg_type,
1805 unsigned response_length,
1806 unsigned msg_length,
1807 bool header_present,
1808 unsigned simd_mode)
1809 {
1810 assert(writemask);
1811
1812 if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
1813 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1814
1815 writemask = ~writemask & WRITEMASK_XYZW;
1816
1817 brw_push_insn_state(p);
1818
1819 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1820 brw_set_mask_control(p, BRW_MASK_DISABLE);
1821
1822 brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
1823 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
1824
1825 brw_pop_insn_state(p);
1826
1827 src0 = __retype_uw(brw_null_reg());
1828 }
1829
1830 {
1831 struct brw_instruction *insn;
1832
1833 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1834
1835 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1836 insn->header.predicate_control = 0; /* XXX */
1837 insn->header.compression_control = BRW_COMPRESSION_NONE;
1838 if (p->gen < 060)
1839 insn->header.destreg__conditionalmod = msg_reg_nr;
1840
1841 brw_set_dest(p, insn, dest);
1842 brw_set_src0(p, insn, src0);
1843 brw_set_sampler_message(p, insn,
1844 binding_table_index,
1845 sampler,
1846 msg_type,
1847 response_length,
1848 msg_length,
1849 header_present,
1850 simd_mode);
1851 }
1852 }
1853
1854 /* All these variables are pretty confusing - we might be better off
1855 * using bitmasks and macros for this, in the old style. Or perhaps
1856 * just having the caller instantiate the fields in dword3 itself.
1857 */
brw_urb_WRITE(struct brw_compile * p,struct brw_reg dest,unsigned msg_reg_nr,struct brw_reg src0,bool allocate,bool used,unsigned msg_length,unsigned response_length,bool eot,bool writes_complete,unsigned offset,unsigned swizzle)1858 void brw_urb_WRITE(struct brw_compile *p,
1859 struct brw_reg dest,
1860 unsigned msg_reg_nr,
1861 struct brw_reg src0,
1862 bool allocate,
1863 bool used,
1864 unsigned msg_length,
1865 unsigned response_length,
1866 bool eot,
1867 bool writes_complete,
1868 unsigned offset,
1869 unsigned swizzle)
1870 {
1871 struct brw_instruction *insn;
1872
1873 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1874
1875 if (p->gen >= 070) {
1876 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
1877 brw_push_insn_state(p);
1878 brw_set_access_mode(p, BRW_ALIGN_1);
1879 brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
1880 __retype_ud(brw_vec1_grf(0, 5)),
1881 brw_imm_ud(0xff00));
1882 brw_pop_insn_state(p);
1883 }
1884
1885 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1886
1887 assert(msg_length < BRW_MAX_MRF);
1888
1889 brw_set_dest(p, insn, dest);
1890 brw_set_src0(p, insn, src0);
1891 brw_set_src1(p, insn, brw_imm_d(0));
1892
1893 if (p->gen <= 060)
1894 insn->header.destreg__conditionalmod = msg_reg_nr;
1895
1896 brw_set_urb_message(p,
1897 insn,
1898 allocate,
1899 used,
1900 msg_length,
1901 response_length,
1902 eot,
1903 writes_complete,
1904 offset,
1905 swizzle);
1906 }
1907
1908 static int
brw_find_next_block_end(struct brw_compile * p,int start)1909 brw_find_next_block_end(struct brw_compile *p, int start)
1910 {
1911 int ip;
1912
1913 for (ip = start + 1; ip < p->nr_insn; ip++) {
1914 struct brw_instruction *insn = &p->store[ip];
1915
1916 switch (insn->header.opcode) {
1917 case BRW_OPCODE_ENDIF:
1918 case BRW_OPCODE_ELSE:
1919 case BRW_OPCODE_WHILE:
1920 return ip;
1921 }
1922 }
1923 assert(!"not reached");
1924 return start + 1;
1925 }
1926
1927 /* There is no DO instruction on gen6, so to find the end of the loop
1928 * we have to see if the loop is jumping back before our start
1929 * instruction.
1930 */
1931 static int
brw_find_loop_end(struct brw_compile * p,int start)1932 brw_find_loop_end(struct brw_compile *p, int start)
1933 {
1934 int ip;
1935 int br = 2;
1936
1937 for (ip = start + 1; ip < p->nr_insn; ip++) {
1938 struct brw_instruction *insn = &p->store[ip];
1939
1940 if (insn->header.opcode == BRW_OPCODE_WHILE) {
1941 int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
1942 : insn->bits3.break_cont.jip;
1943 if (ip + jip / br <= start)
1944 return ip;
1945 }
1946 }
1947 assert(!"not reached");
1948 return start + 1;
1949 }
1950
1951 /* After program generation, go back and update the UIP and JIP of
1952 * BREAK and CONT instructions to their correct locations.
1953 */
1954 void
brw_set_uip_jip(struct brw_compile * p)1955 brw_set_uip_jip(struct brw_compile *p)
1956 {
1957 int ip;
1958 int br = 2;
1959
1960 if (p->gen <= 060)
1961 return;
1962
1963 for (ip = 0; ip < p->nr_insn; ip++) {
1964 struct brw_instruction *insn = &p->store[ip];
1965
1966 switch (insn->header.opcode) {
1967 case BRW_OPCODE_BREAK:
1968 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1969 /* Gen7 UIP points to WHILE; Gen6 points just after it */
1970 insn->bits3.break_cont.uip =
1971 br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
1972 break;
1973 case BRW_OPCODE_CONTINUE:
1974 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1975 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
1976
1977 assert(insn->bits3.break_cont.uip != 0);
1978 assert(insn->bits3.break_cont.jip != 0);
1979 break;
1980 }
1981 }
1982 }
1983
brw_ff_sync(struct brw_compile * p,struct brw_reg dest,unsigned msg_reg_nr,struct brw_reg src0,bool allocate,unsigned response_length,bool eot)1984 void brw_ff_sync(struct brw_compile *p,
1985 struct brw_reg dest,
1986 unsigned msg_reg_nr,
1987 struct brw_reg src0,
1988 bool allocate,
1989 unsigned response_length,
1990 bool eot)
1991 {
1992 struct brw_instruction *insn;
1993
1994 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1995
1996 insn = brw_next_insn(p, BRW_OPCODE_SEND);
1997 brw_set_dest(p, insn, dest);
1998 brw_set_src0(p, insn, src0);
1999 brw_set_src1(p, insn, brw_imm_d(0));
2000
2001 if (p->gen < 060)
2002 insn->header.destreg__conditionalmod = msg_reg_nr;
2003
2004 brw_set_ff_sync_message(p,
2005 insn,
2006 allocate,
2007 response_length,
2008 eot);
2009 }
2010