1 /*
2    Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3    Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4    develop this 3D driver.
5 
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    "Software"), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13 
14    The above copyright notice and this permission notice (including the
15    next paragraph) shall be included in all copies or substantial
16    portions of the Software.
17 
18    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21    IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31 
32 #ifndef BRW_EU_H
33 #define BRW_EU_H
34 
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 
39 #include <assert.h>
40 
41 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
42 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
43 
44 #define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
45 #define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
46 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
47 #define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
48 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
49 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
50 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
51 
52 #define WRITEMASK_X 0x1
53 #define WRITEMASK_Y 0x2
54 #define WRITEMASK_Z 0x4
55 #define WRITEMASK_W 0x8
56 
57 #define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
58 #define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
59 #define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
60 
61 /** Number of general purpose registers (VS, WM, etc) */
62 #define BRW_MAX_GRF 128
63 
64 /** Number of message register file registers */
65 #define BRW_MAX_MRF 16
66 
67 
68 #define BRW_ALIGN_1   0
69 #define BRW_ALIGN_16  1
70 
71 #define BRW_ADDRESS_DIRECT                        0
72 #define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
73 
74 #define BRW_CHANNEL_X     0
75 #define BRW_CHANNEL_Y     1
76 #define BRW_CHANNEL_Z     2
77 #define BRW_CHANNEL_W     3
78 
79 enum brw_compression {
80 	BRW_COMPRESSION_NONE,
81 	BRW_COMPRESSION_2NDHALF,
82 	BRW_COMPRESSION_COMPRESSED,
83 };
84 
85 #define GEN6_COMPRESSION_1Q		0
86 #define GEN6_COMPRESSION_2Q		1
87 #define GEN6_COMPRESSION_3Q		2
88 #define GEN6_COMPRESSION_4Q		3
89 #define GEN6_COMPRESSION_1H		0
90 #define GEN6_COMPRESSION_2H		2
91 
92 #define BRW_CONDITIONAL_NONE  0
93 #define BRW_CONDITIONAL_Z     1
94 #define BRW_CONDITIONAL_NZ    2
95 #define BRW_CONDITIONAL_EQ    1	/* Z */
96 #define BRW_CONDITIONAL_NEQ   2	/* NZ */
97 #define BRW_CONDITIONAL_G     3
98 #define BRW_CONDITIONAL_GE    4
99 #define BRW_CONDITIONAL_L     5
100 #define BRW_CONDITIONAL_LE    6
101 #define BRW_CONDITIONAL_R     7
102 #define BRW_CONDITIONAL_O     8
103 #define BRW_CONDITIONAL_U     9
104 
105 #define BRW_DEBUG_NONE        0
106 #define BRW_DEBUG_BREAKPOINT  1
107 
108 #define BRW_DEPENDENCY_NORMAL         0
109 #define BRW_DEPENDENCY_NOTCLEARED     1
110 #define BRW_DEPENDENCY_NOTCHECKED     2
111 #define BRW_DEPENDENCY_DISABLE        3
112 
113 #define BRW_EXECUTE_1     0
114 #define BRW_EXECUTE_2     1
115 #define BRW_EXECUTE_4     2
116 #define BRW_EXECUTE_8     3
117 #define BRW_EXECUTE_16    4
118 #define BRW_EXECUTE_32    5
119 
120 #define BRW_HORIZONTAL_STRIDE_0   0
121 #define BRW_HORIZONTAL_STRIDE_1   1
122 #define BRW_HORIZONTAL_STRIDE_2   2
123 #define BRW_HORIZONTAL_STRIDE_4   3
124 
125 #define BRW_INSTRUCTION_NORMAL    0
126 #define BRW_INSTRUCTION_SATURATE  1
127 
128 #define BRW_MASK_ENABLE   0
129 #define BRW_MASK_DISABLE  1
130 
131 /** @{
132  *
133  * Gen6 has replaced "mask enable/disable" with WECtrl, which is
134  * effectively the same but much simpler to think about.  Now, there
135  * are two contributors ANDed together to whether channels are
136  * executed: The predication on the instruction, and the channel write
137  * enable.
138  */
139 /**
140  * This is the default value.  It means that a channel's write enable is set
141  * if the per-channel IP is pointing at this instruction.
142  */
143 #define BRW_WE_NORMAL		0
144 /**
145  * This is used like BRW_MASK_DISABLE, and causes all channels to have
146  * their write enable set.  Note that predication still contributes to
147  * whether the channel actually gets written.
148  */
149 #define BRW_WE_ALL		1
150 /** @} */
151 
152 enum opcode {
153 	/* These are the actual hardware opcodes. */
154 	BRW_OPCODE_MOV =	1,
155 	BRW_OPCODE_SEL =	2,
156 	BRW_OPCODE_NOT =	4,
157 	BRW_OPCODE_AND =	5,
158 	BRW_OPCODE_OR =	6,
159 	BRW_OPCODE_XOR =	7,
160 	BRW_OPCODE_SHR =	8,
161 	BRW_OPCODE_SHL =	9,
162 	BRW_OPCODE_RSR =	10,
163 	BRW_OPCODE_RSL =	11,
164 	BRW_OPCODE_ASR =	12,
165 	BRW_OPCODE_CMP =	16,
166 	BRW_OPCODE_CMPN =	17,
167 	BRW_OPCODE_JMPI =	32,
168 	BRW_OPCODE_IF =	34,
169 	BRW_OPCODE_IFF =	35,
170 	BRW_OPCODE_ELSE =	36,
171 	BRW_OPCODE_ENDIF =	37,
172 	BRW_OPCODE_DO =	38,
173 	BRW_OPCODE_WHILE =	39,
174 	BRW_OPCODE_BREAK =	40,
175 	BRW_OPCODE_CONTINUE = 41,
176 	BRW_OPCODE_HALT =	42,
177 	BRW_OPCODE_MSAVE =	44,
178 	BRW_OPCODE_MRESTORE = 45,
179 	BRW_OPCODE_PUSH =	46,
180 	BRW_OPCODE_POP =	47,
181 	BRW_OPCODE_WAIT =	48,
182 	BRW_OPCODE_SEND =	49,
183 	BRW_OPCODE_SENDC =	50,
184 	BRW_OPCODE_MATH =	56,
185 	BRW_OPCODE_ADD =	64,
186 	BRW_OPCODE_MUL =	65,
187 	BRW_OPCODE_AVG =	66,
188 	BRW_OPCODE_FRC =	67,
189 	BRW_OPCODE_RNDU =	68,
190 	BRW_OPCODE_RNDD =	69,
191 	BRW_OPCODE_RNDE =	70,
192 	BRW_OPCODE_RNDZ =	71,
193 	BRW_OPCODE_MAC =	72,
194 	BRW_OPCODE_MACH =	73,
195 	BRW_OPCODE_LZD =	74,
196 	BRW_OPCODE_SAD2 =	80,
197 	BRW_OPCODE_SADA2 =	81,
198 	BRW_OPCODE_DP4 =	84,
199 	BRW_OPCODE_DPH =	85,
200 	BRW_OPCODE_DP3 =	86,
201 	BRW_OPCODE_DP2 =	87,
202 	BRW_OPCODE_DPA2 =	88,
203 	BRW_OPCODE_LINE =	89,
204 	BRW_OPCODE_PLN =	90,
205 	BRW_OPCODE_NOP =	126,
206 
207 	/* These are compiler backend opcodes that get translated into other
208 	 * instructions.
209 	 */
210 	FS_OPCODE_FB_WRITE = 128,
211 	SHADER_OPCODE_RCP,
212 	SHADER_OPCODE_RSQ,
213 	SHADER_OPCODE_SQRT,
214 	SHADER_OPCODE_EXP2,
215 	SHADER_OPCODE_LOG2,
216 	SHADER_OPCODE_POW,
217 	SHADER_OPCODE_SIN,
218 	SHADER_OPCODE_COS,
219 	FS_OPCODE_DDX,
220 	FS_OPCODE_DDY,
221 	FS_OPCODE_PIXEL_X,
222 	FS_OPCODE_PIXEL_Y,
223 	FS_OPCODE_CINTERP,
224 	FS_OPCODE_LINTERP,
225 	FS_OPCODE_TEX,
226 	FS_OPCODE_TXB,
227 	FS_OPCODE_TXD,
228 	FS_OPCODE_TXF,
229 	FS_OPCODE_TXL,
230 	FS_OPCODE_TXS,
231 	FS_OPCODE_DISCARD,
232 	FS_OPCODE_SPILL,
233 	FS_OPCODE_UNSPILL,
234 	FS_OPCODE_PULL_CONSTANT_LOAD,
235 
236 	VS_OPCODE_URB_WRITE,
237 	VS_OPCODE_SCRATCH_READ,
238 	VS_OPCODE_SCRATCH_WRITE,
239 	VS_OPCODE_PULL_CONSTANT_LOAD,
240 };
241 
242 #define BRW_PREDICATE_NONE             0
243 #define BRW_PREDICATE_NORMAL           1
244 #define BRW_PREDICATE_ALIGN1_ANYV             2
245 #define BRW_PREDICATE_ALIGN1_ALLV             3
246 #define BRW_PREDICATE_ALIGN1_ANY2H            4
247 #define BRW_PREDICATE_ALIGN1_ALL2H            5
248 #define BRW_PREDICATE_ALIGN1_ANY4H            6
249 #define BRW_PREDICATE_ALIGN1_ALL4H            7
250 #define BRW_PREDICATE_ALIGN1_ANY8H            8
251 #define BRW_PREDICATE_ALIGN1_ALL8H            9
252 #define BRW_PREDICATE_ALIGN1_ANY16H           10
253 #define BRW_PREDICATE_ALIGN1_ALL16H           11
254 #define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
255 #define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
256 #define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
257 #define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
258 #define BRW_PREDICATE_ALIGN16_ANY4H           6
259 #define BRW_PREDICATE_ALIGN16_ALL4H           7
260 
261 #define BRW_ARCHITECTURE_REGISTER_FILE    0
262 #define BRW_GENERAL_REGISTER_FILE         1
263 #define BRW_MESSAGE_REGISTER_FILE         2
264 #define BRW_IMMEDIATE_VALUE               3
265 
266 #define BRW_REGISTER_TYPE_UD  0
267 #define BRW_REGISTER_TYPE_D   1
268 #define BRW_REGISTER_TYPE_UW  2
269 #define BRW_REGISTER_TYPE_W   3
270 #define BRW_REGISTER_TYPE_UB  4
271 #define BRW_REGISTER_TYPE_B   5
272 #define BRW_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
273 #define BRW_REGISTER_TYPE_HF  6
274 #define BRW_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
275 #define BRW_REGISTER_TYPE_F   7
276 
277 #define BRW_ARF_NULL                  0x00
278 #define BRW_ARF_ADDRESS               0x10
279 #define BRW_ARF_ACCUMULATOR           0x20
280 #define BRW_ARF_FLAG                  0x30
281 #define BRW_ARF_MASK                  0x40
282 #define BRW_ARF_MASK_STACK            0x50
283 #define BRW_ARF_MASK_STACK_DEPTH      0x60
284 #define BRW_ARF_STATE                 0x70
285 #define BRW_ARF_CONTROL               0x80
286 #define BRW_ARF_NOTIFICATION_COUNT    0x90
287 #define BRW_ARF_IP                    0xA0
288 
289 #define BRW_MRF_COMPR4			(1 << 7)
290 
291 #define BRW_AMASK   0
292 #define BRW_IMASK   1
293 #define BRW_LMASK   2
294 #define BRW_CMASK   3
295 
296 #define BRW_THREAD_NORMAL     0
297 #define BRW_THREAD_ATOMIC     1
298 #define BRW_THREAD_SWITCH     2
299 
300 #define BRW_VERTICAL_STRIDE_0                 0
301 #define BRW_VERTICAL_STRIDE_1                 1
302 #define BRW_VERTICAL_STRIDE_2                 2
303 #define BRW_VERTICAL_STRIDE_4                 3
304 #define BRW_VERTICAL_STRIDE_8                 4
305 #define BRW_VERTICAL_STRIDE_16                5
306 #define BRW_VERTICAL_STRIDE_32                6
307 #define BRW_VERTICAL_STRIDE_64                7
308 #define BRW_VERTICAL_STRIDE_128               8
309 #define BRW_VERTICAL_STRIDE_256               9
310 #define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
311 
312 #define BRW_WIDTH_1       0
313 #define BRW_WIDTH_2       1
314 #define BRW_WIDTH_4       2
315 #define BRW_WIDTH_8       3
316 #define BRW_WIDTH_16      4
317 
318 #define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
319 #define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
320 #define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
321 #define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
322 #define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
323 #define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
324 #define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
325 #define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
326 #define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
327 #define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
328 #define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
329 #define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
330 
331 #define BRW_POLYGON_FACING_FRONT      0
332 #define BRW_POLYGON_FACING_BACK       1
333 
334 #define BRW_MESSAGE_TARGET_NULL               0
335 #define BRW_MESSAGE_TARGET_MATH               1 /* reserved on GEN6 */
336 #define BRW_MESSAGE_TARGET_SAMPLER            2
337 #define BRW_MESSAGE_TARGET_GATEWAY            3
338 #define BRW_MESSAGE_TARGET_DATAPORT_READ      4
339 #define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5
340 #define BRW_MESSAGE_TARGET_URB                6
341 #define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7
342 
343 #define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE  4
344 #define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE   5
345 #define GEN6_MESSAGE_TARGET_DP_CONST_CACHE    9
346 
347 #define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
348 #define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
349 #define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
350 
351 #define BRW_SAMPLER_MESSAGE_SAMPLE	              0
352 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
353 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
354 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
355 #define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
356 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
357 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
358 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
359 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
360 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
361 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
362 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
363 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
364 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
365 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
366 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
367 #define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
368 #define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
369 
370 #define GEN5_SAMPLER_MESSAGE_SAMPLE              0
371 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS         1
372 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD          2
373 #define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE      3
374 #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
375 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
376 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
377 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LD           7
378 #define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
379 
380 /* for GEN5 only */
381 #define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
382 #define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
383 #define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
384 #define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
385 
386 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
387 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
388 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
389 #define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
390 #define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
391 
392 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
393 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
394 
395 #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
396 #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
397 
398 /* This one stays the same across generations. */
399 #define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
400 /* GEN4 */
401 #define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
402 #define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          2
403 #define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
404 /* G45, GEN5 */
405 #define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ	    1
406 #define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
407 #define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ	    3
408 #define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
409 #define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
410 /* GEN6 */
411 #define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ	    1
412 #define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
413 #define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
414 #define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ  5
415 #define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
416 
417 #define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
418 #define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
419 #define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
420 
421 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
422 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
423 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
424 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
425 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
426 
427 /**
428  * Message target: Shared Function ID for where to SEND a message.
429  *
430  * These are enumerated in the ISA reference under "send - Send Message".
431  * In particular, see the following tables:
432  * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
433  * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
434  * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
435  *   Overview / GPE Function IDs
436  */
437 enum brw_message_target {
438    BRW_SFID_NULL                     = 0,
439    BRW_SFID_MATH                     = 1, /* Only valid on Gen4-5 */
440    BRW_SFID_SAMPLER                  = 2,
441    BRW_SFID_MESSAGE_GATEWAY          = 3,
442    BRW_SFID_DATAPORT_READ            = 4,
443    BRW_SFID_DATAPORT_WRITE           = 5,
444    BRW_SFID_URB                      = 6,
445    BRW_SFID_THREAD_SPAWNER           = 7,
446 
447    GEN6_SFID_DATAPORT_SAMPLER_CACHE  = 4,
448    GEN6_SFID_DATAPORT_RENDER_CACHE   = 5,
449    GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
450 
451    GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
452 };
453 
454 #define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
455 
456 #define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
457 #define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
458 #define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE                2
459 #define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
460 #define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
461 #define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
462 #define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
463 
464 /* GEN6 */
465 #define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE              7
466 #define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE               8
467 #define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          9
468 #define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE               10
469 #define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE           11
470 #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE             12
471 #define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE               13
472 #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE       14
473 
474 #define BRW_MATH_FUNCTION_INV                              1
475 #define BRW_MATH_FUNCTION_LOG                              2
476 #define BRW_MATH_FUNCTION_EXP                              3
477 #define BRW_MATH_FUNCTION_SQRT                             4
478 #define BRW_MATH_FUNCTION_RSQ                              5
479 #define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
480 #define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
481 #define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
482 #define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
483 #define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
484 #define BRW_MATH_FUNCTION_POW                              10
485 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
486 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
487 #define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
488 
489 #define BRW_MATH_INTEGER_UNSIGNED     0
490 #define BRW_MATH_INTEGER_SIGNED       1
491 
492 #define BRW_MATH_PRECISION_FULL        0
493 #define BRW_MATH_PRECISION_PARTIAL     1
494 
495 #define BRW_MATH_SATURATE_NONE         0
496 #define BRW_MATH_SATURATE_SATURATE     1
497 
498 #define BRW_MATH_DATA_VECTOR  0
499 #define BRW_MATH_DATA_SCALAR  1
500 
501 #define BRW_URB_OPCODE_WRITE  0
502 
503 #define BRW_URB_SWIZZLE_NONE          0
504 #define BRW_URB_SWIZZLE_INTERLEAVE    1
505 #define BRW_URB_SWIZZLE_TRANSPOSE     2
506 
507 #define BRW_SCRATCH_SPACE_SIZE_1K     0
508 #define BRW_SCRATCH_SPACE_SIZE_2K     1
509 #define BRW_SCRATCH_SPACE_SIZE_4K     2
510 #define BRW_SCRATCH_SPACE_SIZE_8K     3
511 #define BRW_SCRATCH_SPACE_SIZE_16K    4
512 #define BRW_SCRATCH_SPACE_SIZE_32K    5
513 #define BRW_SCRATCH_SPACE_SIZE_64K    6
514 #define BRW_SCRATCH_SPACE_SIZE_128K   7
515 #define BRW_SCRATCH_SPACE_SIZE_256K   8
516 #define BRW_SCRATCH_SPACE_SIZE_512K   9
517 #define BRW_SCRATCH_SPACE_SIZE_1M     10
518 #define BRW_SCRATCH_SPACE_SIZE_2M     11
519 
520 #define REG_SIZE (8*4)
521 
522 struct brw_instruction {
523 	struct {
524 		unsigned opcode:7;
525 		unsigned pad:1;
526 		unsigned access_mode:1;
527 		unsigned mask_control:1;
528 		unsigned dependency_control:2;
529 		unsigned compression_control:2; /* gen6: quater control */
530 		unsigned thread_control:2;
531 		unsigned predicate_control:4;
532 		unsigned predicate_inverse:1;
533 		unsigned execution_size:3;
534 		/**
535 		 * Conditional Modifier for most instructions.  On Gen6+, this is also
536 		 * used for the SEND instruction's Message Target/SFID.
537 		 */
538 		unsigned destreg__conditionalmod:4;
539 		unsigned acc_wr_control:1;
540 		unsigned cmpt_control:1;
541 		unsigned debug_control:1;
542 		unsigned saturate:1;
543 	} header;
544 
545 	union {
546 		struct {
547 			unsigned dest_reg_file:2;
548 			unsigned dest_reg_type:3;
549 			unsigned src0_reg_file:2;
550 			unsigned src0_reg_type:3;
551 			unsigned src1_reg_file:2;
552 			unsigned src1_reg_type:3;
553 			unsigned pad:1;
554 			unsigned dest_subreg_nr:5;
555 			unsigned dest_reg_nr:8;
556 			unsigned dest_horiz_stride:2;
557 			unsigned dest_address_mode:1;
558 		} da1;
559 
560 		struct {
561 			unsigned dest_reg_file:2;
562 			unsigned dest_reg_type:3;
563 			unsigned src0_reg_file:2;
564 			unsigned src0_reg_type:3;
565 			unsigned src1_reg_file:2;        /* 0x00000c00 */
566 			unsigned src1_reg_type:3;        /* 0x00007000 */
567 			unsigned pad:1;
568 			int dest_indirect_offset:10;	/* offset against the deref'd address reg */
569 			unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
570 			unsigned dest_horiz_stride:2;
571 			unsigned dest_address_mode:1;
572 		} ia1;
573 
574 		struct {
575 			unsigned dest_reg_file:2;
576 			unsigned dest_reg_type:3;
577 			unsigned src0_reg_file:2;
578 			unsigned src0_reg_type:3;
579 			unsigned src1_reg_file:2;
580 			unsigned src1_reg_type:3;
581 			unsigned pad:1;
582 			unsigned dest_writemask:4;
583 			unsigned dest_subreg_nr:1;
584 			unsigned dest_reg_nr:8;
585 			unsigned dest_horiz_stride:2;
586 			unsigned dest_address_mode:1;
587 		} da16;
588 
589 		struct {
590 			unsigned dest_reg_file:2;
591 			unsigned dest_reg_type:3;
592 			unsigned src0_reg_file:2;
593 			unsigned src0_reg_type:3;
594 			unsigned pad0:6;
595 			unsigned dest_writemask:4;
596 			int dest_indirect_offset:6;
597 			unsigned dest_subreg_nr:3;
598 			unsigned dest_horiz_stride:2;
599 			unsigned dest_address_mode:1;
600 		} ia16;
601 
602 		struct {
603 			unsigned dest_reg_file:2;
604 			unsigned dest_reg_type:3;
605 			unsigned src0_reg_file:2;
606 			unsigned src0_reg_type:3;
607 			unsigned src1_reg_file:2;
608 			unsigned src1_reg_type:3;
609 			unsigned pad:1;
610 
611 			int jump_count:16;
612 		} branch_gen6;
613 
614 		struct {
615 			unsigned dest_reg_file:1;
616 			unsigned flag_subreg_num:1;
617 			unsigned pad0:2;
618 			unsigned src0_abs:1;
619 			unsigned src0_negate:1;
620 			unsigned src1_abs:1;
621 			unsigned src1_negate:1;
622 			unsigned src2_abs:1;
623 			unsigned src2_negate:1;
624 			unsigned pad1:7;
625 			unsigned dest_writemask:4;
626 			unsigned dest_subreg_nr:3;
627 			unsigned dest_reg_nr:8;
628 		} da3src;
629 	} bits1;
630 
631 
632 	union {
633 		struct {
634 			unsigned src0_subreg_nr:5;
635 			unsigned src0_reg_nr:8;
636 			unsigned src0_abs:1;
637 			unsigned src0_negate:1;
638 			unsigned src0_address_mode:1;
639 			unsigned src0_horiz_stride:2;
640 			unsigned src0_width:3;
641 			unsigned src0_vert_stride:4;
642 			unsigned flag_subreg_nr:1;
643 			unsigned flag_reg_nr:1;
644 			unsigned pad:5;
645 		} da1;
646 
647 		struct {
648 			int src0_indirect_offset:10;
649 			unsigned src0_subreg_nr:3;
650 			unsigned src0_abs:1;
651 			unsigned src0_negate:1;
652 			unsigned src0_address_mode:1;
653 			unsigned src0_horiz_stride:2;
654 			unsigned src0_width:3;
655 			unsigned src0_vert_stride:4;
656 			unsigned flag_subreg_nr:1;
657 			unsigned flag_reg_nr:1;
658 			unsigned pad:5;
659 		} ia1;
660 
661 		struct {
662 			unsigned src0_swz_x:2;
663 			unsigned src0_swz_y:2;
664 			unsigned src0_subreg_nr:1;
665 			unsigned src0_reg_nr:8;
666 			unsigned src0_abs:1;
667 			unsigned src0_negate:1;
668 			unsigned src0_address_mode:1;
669 			unsigned src0_swz_z:2;
670 			unsigned src0_swz_w:2;
671 			unsigned pad0:1;
672 			unsigned src0_vert_stride:4;
673 			unsigned flag_subreg_nr:1;
674 			unsigned flag_reg_nr:1;
675 			unsigned pad1:5;
676 		} da16;
677 
678 		struct {
679 			unsigned src0_swz_x:2;
680 			unsigned src0_swz_y:2;
681 			int src0_indirect_offset:6;
682 			unsigned src0_subreg_nr:3;
683 			unsigned src0_abs:1;
684 			unsigned src0_negate:1;
685 			unsigned src0_address_mode:1;
686 			unsigned src0_swz_z:2;
687 			unsigned src0_swz_w:2;
688 			unsigned pad0:1;
689 			unsigned src0_vert_stride:4;
690 			unsigned flag_subreg_nr:1;
691 			unsigned flag_reg_nr:1;
692 			unsigned pad1:5;
693 		} ia16;
694 
695 		/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
696 		 *
697 		 * Does not apply to Gen6+.  The SFID/message target moved to bits
698 		 * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
699 		 */
700 		struct {
701 			unsigned pad:26;
702 			unsigned end_of_thread:1;
703 			unsigned pad1:1;
704 			unsigned sfid:4;
705 		} send_gen5;  /* for Ironlake only */
706 
707 		struct {
708 			unsigned src0_rep_ctrl:1;
709 			unsigned src0_swizzle:8;
710 			unsigned src0_subreg_nr:3;
711 			unsigned src0_reg_nr:8;
712 			unsigned pad0:1;
713 			unsigned src1_rep_ctrl:1;
714 			unsigned src1_swizzle:8;
715 			unsigned src1_subreg_nr_low:2;
716 		} da3src;
717 	} bits2;
718 
719 	union {
720 		struct {
721 			unsigned src1_subreg_nr:5;
722 			unsigned src1_reg_nr:8;
723 			unsigned src1_abs:1;
724 			unsigned src1_negate:1;
725 			unsigned src1_address_mode:1;
726 			unsigned src1_horiz_stride:2;
727 			unsigned src1_width:3;
728 			unsigned src1_vert_stride:4;
729 			unsigned pad0:7;
730 		} da1;
731 
732 		struct {
733 			unsigned src1_swz_x:2;
734 			unsigned src1_swz_y:2;
735 			unsigned src1_subreg_nr:1;
736 			unsigned src1_reg_nr:8;
737 			unsigned src1_abs:1;
738 			unsigned src1_negate:1;
739 			unsigned src1_address_mode:1;
740 			unsigned src1_swz_z:2;
741 			unsigned src1_swz_w:2;
742 			unsigned pad1:1;
743 			unsigned src1_vert_stride:4;
744 			unsigned pad2:7;
745 		} da16;
746 
747 		struct {
748 			int src1_indirect_offset:10;
749 			unsigned src1_subreg_nr:3;
750 			unsigned src1_abs:1;
751 			unsigned src1_negate:1;
752 			unsigned src1_address_mode:1;
753 			unsigned src1_horiz_stride:2;
754 			unsigned src1_width:3;
755 			unsigned src1_vert_stride:4;
756 			unsigned flag_subreg_nr:1;
757 			unsigned flag_reg_nr:1;
758 			unsigned pad1:5;
759 		} ia1;
760 
761 		struct {
762 			unsigned src1_swz_x:2;
763 			unsigned src1_swz_y:2;
764 			int  src1_indirect_offset:6;
765 			unsigned src1_subreg_nr:3;
766 			unsigned src1_abs:1;
767 			unsigned src1_negate:1;
768 			unsigned pad0:1;
769 			unsigned src1_swz_z:2;
770 			unsigned src1_swz_w:2;
771 			unsigned pad1:1;
772 			unsigned src1_vert_stride:4;
773 			unsigned flag_subreg_nr:1;
774 			unsigned flag_reg_nr:1;
775 			unsigned pad2:5;
776 		} ia16;
777 
778 		struct {
779 			int jump_count:16;	/* note: signed */
780 			unsigned pop_count:4;
781 			unsigned pad0:12;
782 		} if_else;
783 
784 		/* This is also used for gen7 IF/ELSE instructions */
785 		struct {
786 			/* Signed jump distance to the ip to jump to if all channels
787 			 * are disabled after the break or continue.  It should point
788 			 * to the end of the innermost control flow block, as that's
789 			 * where some channel could get re-enabled.
790 			 */
791 			int jip:16;
792 
793 			/* Signed jump distance to the location to resume execution
794 			 * of this channel if it's enabled for the break or continue.
795 			 */
796 			int uip:16;
797 		} break_cont;
798 
799 		/**
800 		 * \defgroup SEND instructions / Message Descriptors
801 		 *
802 		 * @{
803 		 */
804 
805 		/**
806 		 * Generic Message Descriptor for Gen4 SEND instructions.  The structs
807 		 * below expand function_control to something specific for their
808 		 * message.  Due to struct packing issues, they duplicate these bits.
809 		 *
810 		 * See the G45 PRM, Volume 4, Table 14-15.
811 		 */
812 		struct {
813 			unsigned function_control:16;
814 			unsigned response_length:4;
815 			unsigned msg_length:4;
816 			unsigned msg_target:4;
817 			unsigned pad1:3;
818 			unsigned end_of_thread:1;
819 		} generic;
820 
821 		/**
822 		 * Generic Message Descriptor for Gen5-7 SEND instructions.
823 		 *
824 		 * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15.  (Sadly, most
825 		 * of the information on the SEND instruction is missing from the public
826 		 * Ironlake PRM.)
827 		 *
828 		 * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
829 		 * According to the SEND instruction description:
830 		 * "The MSb of the message description, the EOT field, always comes from
831 		 *  bit 127 of the instruction word"...which is bit 31 of this field.
832 		 */
833 		struct {
834 			unsigned function_control:19;
835 			unsigned header_present:1;
836 			unsigned response_length:5;
837 			unsigned msg_length:4;
838 			unsigned pad1:2;
839 			unsigned end_of_thread:1;
840 		} generic_gen5;
841 
842 		/** G45 PRM, Volume 4, Section 6.1.1.1 */
843 		struct {
844 			unsigned function:4;
845 			unsigned int_type:1;
846 			unsigned precision:1;
847 			unsigned saturate:1;
848 			unsigned data_type:1;
849 			unsigned pad0:8;
850 			unsigned response_length:4;
851 			unsigned msg_length:4;
852 			unsigned msg_target:4;
853 			unsigned pad1:3;
854 			unsigned end_of_thread:1;
855 		} math;
856 
857 		/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
858 		struct {
859 			unsigned function:4;
860 			unsigned int_type:1;
861 			unsigned precision:1;
862 			unsigned saturate:1;
863 			unsigned data_type:1;
864 			unsigned snapshot:1;
865 			unsigned pad0:10;
866 			unsigned header_present:1;
867 			unsigned response_length:5;
868 			unsigned msg_length:4;
869 			unsigned pad1:2;
870 			unsigned end_of_thread:1;
871 		} math_gen5;
872 
873 		/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
874 		struct {
875 			unsigned binding_table_index:8;
876 			unsigned sampler:4;
877 			unsigned return_format:2;
878 			unsigned msg_type:2;
879 			unsigned response_length:4;
880 			unsigned msg_length:4;
881 			unsigned msg_target:4;
882 			unsigned pad1:3;
883 			unsigned end_of_thread:1;
884 		} sampler;
885 
886 		/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
887 		struct {
888 			unsigned binding_table_index:8;
889 			unsigned sampler:4;
890 			unsigned msg_type:4;
891 			unsigned response_length:4;
892 			unsigned msg_length:4;
893 			unsigned msg_target:4;
894 			unsigned pad1:3;
895 			unsigned end_of_thread:1;
896 		} sampler_g4x;
897 
898 		/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
899 		struct {
900 			unsigned binding_table_index:8;
901 			unsigned sampler:4;
902 			unsigned msg_type:4;
903 			unsigned simd_mode:2;
904 			unsigned pad0:1;
905 			unsigned header_present:1;
906 			unsigned response_length:5;
907 			unsigned msg_length:4;
908 			unsigned pad1:2;
909 			unsigned end_of_thread:1;
910 		} sampler_gen5;
911 
912 		struct {
913 			unsigned binding_table_index:8;
914 			unsigned sampler:4;
915 			unsigned msg_type:5;
916 			unsigned simd_mode:2;
917 			unsigned header_present:1;
918 			unsigned response_length:5;
919 			unsigned msg_length:4;
920 			unsigned pad1:2;
921 			unsigned end_of_thread:1;
922 		} sampler_gen7;
923 
924 		struct brw_urb_immediate {
925 			unsigned opcode:4;
926 			unsigned offset:6;
927 			unsigned swizzle_control:2;
928 			unsigned pad:1;
929 			unsigned allocate:1;
930 			unsigned used:1;
931 			unsigned complete:1;
932 			unsigned response_length:4;
933 			unsigned msg_length:4;
934 			unsigned msg_target:4;
935 			unsigned pad1:3;
936 			unsigned end_of_thread:1;
937 		} urb;
938 
939 		struct {
940 			unsigned opcode:4;
941 			unsigned offset:6;
942 			unsigned swizzle_control:2;
943 			unsigned pad:1;
944 			unsigned allocate:1;
945 			unsigned used:1;
946 			unsigned complete:1;
947 			unsigned pad0:3;
948 			unsigned header_present:1;
949 			unsigned response_length:5;
950 			unsigned msg_length:4;
951 			unsigned pad1:2;
952 			unsigned end_of_thread:1;
953 		} urb_gen5;
954 
955 		struct {
956 			unsigned opcode:3;
957 			unsigned offset:11;
958 			unsigned swizzle_control:1;
959 			unsigned complete:1;
960 			unsigned per_slot_offset:1;
961 			unsigned pad0:2;
962 			unsigned header_present:1;
963 			unsigned response_length:5;
964 			unsigned msg_length:4;
965 			unsigned pad1:2;
966 			unsigned end_of_thread:1;
967 		} urb_gen7;
968 
969 		/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
970 		struct {
971 			unsigned binding_table_index:8;
972 			unsigned msg_control:4;
973 			unsigned msg_type:2;
974 			unsigned target_cache:2;
975 			unsigned response_length:4;
976 			unsigned msg_length:4;
977 			unsigned msg_target:4;
978 			unsigned pad1:3;
979 			unsigned end_of_thread:1;
980 		} dp_read;
981 
982 		/** G45 PRM, Volume 4, Section 5.10.1.1.2 */
983 		struct {
984 			unsigned binding_table_index:8;
985 			unsigned msg_control:3;
986 			unsigned msg_type:3;
987 			unsigned target_cache:2;
988 			unsigned response_length:4;
989 			unsigned msg_length:4;
990 			unsigned msg_target:4;
991 			unsigned pad1:3;
992 			unsigned end_of_thread:1;
993 		} dp_read_g4x;
994 
995 		/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
996 		struct {
997 			unsigned binding_table_index:8;
998 			unsigned msg_control:3;
999 			unsigned msg_type:3;
1000 			unsigned target_cache:2;
1001 			unsigned pad0:3;
1002 			unsigned header_present:1;
1003 			unsigned response_length:5;
1004 			unsigned msg_length:4;
1005 			unsigned pad1:2;
1006 			unsigned end_of_thread:1;
1007 		} dp_read_gen5;
1008 
1009 		/** G45 PRM, Volume 4, Section 5.10.1.1.2.  For both Gen4 and G45. */
1010 		struct {
1011 			unsigned binding_table_index:8;
1012 			unsigned msg_control:3;
1013 			unsigned last_render_target:1;
1014 			unsigned msg_type:3;
1015 			unsigned send_commit_msg:1;
1016 			unsigned response_length:4;
1017 			unsigned msg_length:4;
1018 			unsigned msg_target:4;
1019 			unsigned pad1:3;
1020 			unsigned end_of_thread:1;
1021 		} dp_write;
1022 
1023 		/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
1024 		struct {
1025 			unsigned binding_table_index:8;
1026 			unsigned msg_control:3;
1027 			unsigned last_render_target:1;
1028 			unsigned msg_type:3;
1029 			unsigned send_commit_msg:1;
1030 			unsigned pad0:3;
1031 			unsigned header_present:1;
1032 			unsigned response_length:5;
1033 			unsigned msg_length:4;
1034 			unsigned pad1:2;
1035 			unsigned end_of_thread:1;
1036 		} dp_write_gen5;
1037 
1038 		/**
1039 		 * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
1040 		 *
1041 		 * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
1042 		 **/
1043 		struct {
1044 			unsigned binding_table_index:8;
1045 			unsigned msg_control:5;
1046 			unsigned msg_type:3;
1047 			unsigned pad0:3;
1048 			unsigned header_present:1;
1049 			unsigned response_length:5;
1050 			unsigned msg_length:4;
1051 			unsigned pad1:2;
1052 			unsigned end_of_thread:1;
1053 		} gen6_dp_sampler_const_cache;
1054 
1055 		/**
1056 		 * Message for the Sandybridge Render Cache Data Port.
1057 		 *
1058 		 * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
1059 		 * Section 3.9.2.1.1: Message Descriptor.
1060 		 *
1061 		 * "Slot Group Select" and "Last Render Target" are part of the
1062 		 * 5-bit message control for Render Target Write messages.  See
1063 		 * Section 3.9.9.2.1 of the same volume.
1064 		 */
1065 		struct {
1066 			unsigned binding_table_index:8;
1067 			unsigned msg_control:3;
1068 			unsigned slot_group_select:1;
1069 			unsigned last_render_target:1;
1070 			unsigned msg_type:4;
1071 			unsigned send_commit_msg:1;
1072 			unsigned pad0:1;
1073 			unsigned header_present:1;
1074 			unsigned response_length:5;
1075 			unsigned msg_length:4;
1076 			unsigned pad1:2;
1077 			unsigned end_of_thread:1;
1078 		} gen6_dp;
1079 
1080 		/**
1081 		 * Message for any of the Gen7 Data Port caches.
1082 		 *
1083 		 * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
1084 		 * Data Port Messages / Message Descriptor.  Once again, "Slot Group
1085 		 * Select" and "Last Render Target" are part of the 6-bit message
1086 		 * control for Render Target Writes.
1087 		 */
1088 		struct {
1089 			unsigned binding_table_index:8;
1090 			unsigned msg_control:3;
1091 			unsigned slot_group_select:1;
1092 			unsigned last_render_target:1;
1093 			unsigned msg_control_pad:1;
1094 			unsigned msg_type:4;
1095 			unsigned pad1:1;
1096 			unsigned header_present:1;
1097 			unsigned response_length:5;
1098 			unsigned msg_length:4;
1099 			unsigned pad2:2;
1100 			unsigned end_of_thread:1;
1101 		} gen7_dp;
1102 		/** @} */
1103 
1104 		struct {
1105 			unsigned src1_subreg_nr_high:1;
1106 			unsigned src1_reg_nr:8;
1107 			unsigned pad0:1;
1108 			unsigned src2_rep_ctrl:1;
1109 			unsigned src2_swizzle:8;
1110 			unsigned src2_subreg_nr:3;
1111 			unsigned src2_reg_nr:8;
1112 			unsigned pad1:2;
1113 		} da3src;
1114 
1115 		int d;
1116 		unsigned ud;
1117 		float f;
1118 	} bits3;
1119 };
1120 
1121 
1122 /* These aren't hardware structs, just something useful for us to pass around:
1123  *
1124  * Align1 operation has a lot of control over input ranges.  Used in
1125  * WM programs to implement shaders decomposed into "channel serial"
1126  * or "structure of array" form:
1127  */
1128 struct brw_reg {
1129 	unsigned type:4;
1130 	unsigned file:2;
1131 	unsigned nr:8;
1132 	unsigned subnr:5;		/* :1 in align16 */
1133 	unsigned negate:1;		/* source only */
1134 	unsigned abs:1;		/* source only */
1135 	unsigned vstride:4;		/* source only */
1136 	unsigned width:3;		/* src only, align1 only */
1137 	unsigned hstride:2;   		/* align1 only */
1138 	unsigned address_mode:1;	/* relative addressing, hopefully! */
1139 	unsigned pad0:1;
1140 
1141 	union {
1142 		struct {
1143 			unsigned swizzle:8;		/* src only, align16 only */
1144 			unsigned writemask:4;		/* dest only, align16 only */
1145 			int  indirect_offset:10;	/* relative addressing offset */
1146 			unsigned pad1:10;		/* two dwords total */
1147 		} bits;
1148 
1149 		float f;
1150 		int   d;
1151 		unsigned ud;
1152 	} dw1;
1153 };
1154 
1155 struct brw_indirect {
1156 	unsigned addr_subnr:4;
1157 	int addr_offset:10;
1158 	unsigned pad:18;
1159 };
1160 
1161 #define BRW_EU_MAX_INSN_STACK 5
1162 #define BRW_EU_MAX_INSN 10000
1163 
1164 struct brw_compile {
1165 	struct brw_instruction *store;
1166 	unsigned nr_insn;
1167 
1168 	int gen;
1169 
1170 	/* Allow clients to push/pop instruction state:
1171 	*/
1172 	struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
1173 	bool compressed_stack[BRW_EU_MAX_INSN_STACK];
1174 	struct brw_instruction *current;
1175 
1176 	unsigned flag_value;
1177 	bool single_program_flow;
1178 	bool compressed;
1179 
1180 	/* Control flow stacks:
1181 	 * - if_stack contains IF and ELSE instructions which must be patched
1182 	 *   (and popped) once the matching ENDIF instruction is encountered.
1183 	 */
1184 	struct brw_instruction **if_stack;
1185 	int if_stack_depth;
1186 	int if_stack_array_size;
1187 };
1188 
type_sz(unsigned type)1189 static inline int type_sz(unsigned type)
1190 {
1191 	switch (type) {
1192 	case BRW_REGISTER_TYPE_UD:
1193 	case BRW_REGISTER_TYPE_D:
1194 	case BRW_REGISTER_TYPE_F:
1195 		return 4;
1196 	case BRW_REGISTER_TYPE_HF:
1197 	case BRW_REGISTER_TYPE_UW:
1198 	case BRW_REGISTER_TYPE_W:
1199 		return 2;
1200 	case BRW_REGISTER_TYPE_UB:
1201 	case BRW_REGISTER_TYPE_B:
1202 		return 1;
1203 	default:
1204 		return 0;
1205 	}
1206 }
1207 
1208 /**
1209  * Construct a brw_reg.
1210  * \param file  one of the BRW_x_REGISTER_FILE values
1211  * \param nr  register number/index
1212  * \param subnr  register sub number
1213  * \param type  one of BRW_REGISTER_TYPE_x
1214  * \param vstride  one of BRW_VERTICAL_STRIDE_x
1215  * \param width  one of BRW_WIDTH_x
1216  * \param hstride  one of BRW_HORIZONTAL_STRIDE_x
1217  * \param swizzle  one of BRW_SWIZZLE_x
1218  * \param writemask  WRITEMASK_X/Y/Z/W bitfield
1219  */
brw_reg(unsigned file,unsigned nr,unsigned subnr,unsigned type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)1220 static inline struct brw_reg brw_reg(unsigned file,
1221 				     unsigned nr,
1222 				     unsigned subnr,
1223 				     unsigned type,
1224 				     unsigned vstride,
1225 				     unsigned width,
1226 				     unsigned hstride,
1227 				     unsigned swizzle,
1228 				     unsigned writemask)
1229 {
1230 	struct brw_reg reg;
1231 	if (file == BRW_GENERAL_REGISTER_FILE)
1232 		assert(nr < BRW_MAX_GRF);
1233 	else if (file == BRW_MESSAGE_REGISTER_FILE)
1234 		assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1235 	else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
1236 		assert(nr <= BRW_ARF_IP);
1237 
1238 	reg.type = type;
1239 	reg.file = file;
1240 	reg.nr = nr;
1241 	reg.subnr = subnr * type_sz(type);
1242 	reg.negate = 0;
1243 	reg.abs = 0;
1244 	reg.vstride = vstride;
1245 	reg.width = width;
1246 	reg.hstride = hstride;
1247 	reg.address_mode = BRW_ADDRESS_DIRECT;
1248 	reg.pad0 = 0;
1249 
1250 	/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
1251 	 * set swizzle and writemask to W, as the lower bits of subnr will
1252 	 * be lost when converted to align16.  This is probably too much to
1253 	 * keep track of as you'd want it adjusted by suboffset(), etc.
1254 	 * Perhaps fix up when converting to align16?
1255 	 */
1256 	reg.dw1.bits.swizzle = swizzle;
1257 	reg.dw1.bits.writemask = writemask;
1258 	reg.dw1.bits.indirect_offset = 0;
1259 	reg.dw1.bits.pad1 = 0;
1260 	return reg;
1261 }
1262 
1263 /** Construct float[16] register */
brw_vec16_reg(unsigned file,unsigned nr,unsigned subnr)1264 static inline struct brw_reg brw_vec16_reg(unsigned file,
1265 					   unsigned nr,
1266 					   unsigned subnr)
1267 {
1268 	return brw_reg(file,
1269 		       nr,
1270 		       subnr,
1271 		       BRW_REGISTER_TYPE_F,
1272 		       BRW_VERTICAL_STRIDE_16,
1273 		       BRW_WIDTH_16,
1274 		       BRW_HORIZONTAL_STRIDE_1,
1275 		       BRW_SWIZZLE_XYZW,
1276 		       WRITEMASK_XYZW);
1277 }
1278 
1279 /** Construct float[8] register */
brw_vec8_reg(unsigned file,unsigned nr,unsigned subnr)1280 static inline struct brw_reg brw_vec8_reg(unsigned file,
1281 					  unsigned nr,
1282 					  unsigned subnr)
1283 {
1284 	return brw_reg(file,
1285 		       nr,
1286 		       subnr,
1287 		       BRW_REGISTER_TYPE_F,
1288 		       BRW_VERTICAL_STRIDE_8,
1289 		       BRW_WIDTH_8,
1290 		       BRW_HORIZONTAL_STRIDE_1,
1291 		       BRW_SWIZZLE_XYZW,
1292 		       WRITEMASK_XYZW);
1293 }
1294 
1295 /** Construct float[4] register */
brw_vec4_reg(unsigned file,unsigned nr,unsigned subnr)1296 static inline struct brw_reg brw_vec4_reg(unsigned file,
1297 					  unsigned nr,
1298 					  unsigned subnr)
1299 {
1300 	return brw_reg(file,
1301 		       nr,
1302 		       subnr,
1303 		       BRW_REGISTER_TYPE_F,
1304 		       BRW_VERTICAL_STRIDE_4,
1305 		       BRW_WIDTH_4,
1306 		       BRW_HORIZONTAL_STRIDE_1,
1307 		       BRW_SWIZZLE_XYZW,
1308 		       WRITEMASK_XYZW);
1309 }
1310 
1311 /** Construct float[2] register */
brw_vec2_reg(unsigned file,unsigned nr,unsigned subnr)1312 static inline struct brw_reg brw_vec2_reg(unsigned file,
1313 					  unsigned nr,
1314 					  unsigned subnr)
1315 {
1316 	return brw_reg(file,
1317 		       nr,
1318 		       subnr,
1319 		       BRW_REGISTER_TYPE_F,
1320 		       BRW_VERTICAL_STRIDE_2,
1321 		       BRW_WIDTH_2,
1322 		       BRW_HORIZONTAL_STRIDE_1,
1323 		       BRW_SWIZZLE_XYXY,
1324 		       WRITEMASK_XY);
1325 }
1326 
1327 /** Construct float[1] register */
brw_vec1_reg(unsigned file,unsigned nr,unsigned subnr)1328 static inline struct brw_reg brw_vec1_reg(unsigned file,
1329 					  unsigned nr,
1330 					  unsigned subnr)
1331 {
1332 	return brw_reg(file,
1333 		       nr,
1334 		       subnr,
1335 		       BRW_REGISTER_TYPE_F,
1336 		       BRW_VERTICAL_STRIDE_0,
1337 		       BRW_WIDTH_1,
1338 		       BRW_HORIZONTAL_STRIDE_0,
1339 		       BRW_SWIZZLE_XXXX,
1340 		       WRITEMASK_X);
1341 }
1342 
1343 
__retype(struct brw_reg reg,unsigned type)1344 static inline struct brw_reg __retype(struct brw_reg reg,
1345 				      unsigned type)
1346 {
1347 	reg.type = type;
1348 	return reg;
1349 }
1350 
__retype_d(struct brw_reg reg)1351 static inline struct brw_reg __retype_d(struct brw_reg reg)
1352 {
1353 	return __retype(reg, BRW_REGISTER_TYPE_D);
1354 }
1355 
__retype_ud(struct brw_reg reg)1356 static inline struct brw_reg __retype_ud(struct brw_reg reg)
1357 {
1358 	return __retype(reg, BRW_REGISTER_TYPE_UD);
1359 }
1360 
__retype_uw(struct brw_reg reg)1361 static inline struct brw_reg __retype_uw(struct brw_reg reg)
1362 {
1363 	return __retype(reg, BRW_REGISTER_TYPE_UW);
1364 }
1365 
__sechalf(struct brw_reg reg)1366 static inline struct brw_reg __sechalf(struct brw_reg reg)
1367 {
1368 	if (reg.vstride)
1369 		reg.nr++;
1370 	return reg;
1371 }
1372 
__suboffset(struct brw_reg reg,unsigned delta)1373 static inline struct brw_reg __suboffset(struct brw_reg reg,
1374 					 unsigned delta)
1375 {
1376 	reg.subnr += delta * type_sz(reg.type);
1377 	return reg;
1378 }
1379 
__offset(struct brw_reg reg,unsigned delta)1380 static inline struct brw_reg __offset(struct brw_reg reg,
1381 				      unsigned delta)
1382 {
1383 	reg.nr += delta;
1384 	return reg;
1385 }
1386 
byte_offset(struct brw_reg reg,unsigned bytes)1387 static inline struct brw_reg byte_offset(struct brw_reg reg,
1388 					 unsigned bytes)
1389 {
1390 	unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
1391 	reg.nr = newoffset / REG_SIZE;
1392 	reg.subnr = newoffset % REG_SIZE;
1393 	return reg;
1394 }
1395 
1396 
1397 /** Construct unsigned word[16] register */
brw_uw16_reg(unsigned file,unsigned nr,unsigned subnr)1398 static inline struct brw_reg brw_uw16_reg(unsigned file,
1399 					  unsigned nr,
1400 					  unsigned subnr)
1401 {
1402 	return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1403 }
1404 
1405 /** Construct unsigned word[8] register */
brw_uw8_reg(unsigned file,unsigned nr,unsigned subnr)1406 static inline struct brw_reg brw_uw8_reg(unsigned file,
1407 					 unsigned nr,
1408 					 unsigned subnr)
1409 {
1410 	return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1411 }
1412 
1413 /** Construct unsigned word[1] register */
brw_uw1_reg(unsigned file,unsigned nr,unsigned subnr)1414 static inline struct brw_reg brw_uw1_reg(unsigned file,
1415 					 unsigned nr,
1416 					 unsigned subnr)
1417 {
1418 	return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1419 }
1420 
brw_imm_reg(unsigned type)1421 static inline struct brw_reg brw_imm_reg(unsigned type)
1422 {
1423 	return brw_reg( BRW_IMMEDIATE_VALUE,
1424 			0,
1425 			0,
1426 			type,
1427 			BRW_VERTICAL_STRIDE_0,
1428 			BRW_WIDTH_1,
1429 			BRW_HORIZONTAL_STRIDE_0,
1430 			0,
1431 			0);
1432 }
1433 
1434 /** Construct float immediate register */
brw_imm_f(float f)1435 static inline struct brw_reg brw_imm_f(float f)
1436 {
1437 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
1438 	imm.dw1.f = f;
1439 	return imm;
1440 }
1441 
1442 /** Construct integer immediate register */
brw_imm_d(int d)1443 static inline struct brw_reg brw_imm_d(int d)
1444 {
1445 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
1446 	imm.dw1.d = d;
1447 	return imm;
1448 }
1449 
1450 /** Construct uint immediate register */
brw_imm_ud(unsigned ud)1451 static inline struct brw_reg brw_imm_ud(unsigned ud)
1452 {
1453 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
1454 	imm.dw1.ud = ud;
1455 	return imm;
1456 }
1457 
1458 /** Construct ushort immediate register */
brw_imm_uw(uint16_t uw)1459 static inline struct brw_reg brw_imm_uw(uint16_t uw)
1460 {
1461 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
1462 	imm.dw1.ud = uw | (uw << 16);
1463 	return imm;
1464 }
1465 
1466 /** Construct short immediate register */
brw_imm_w(int16_t w)1467 static inline struct brw_reg brw_imm_w(int16_t w)
1468 {
1469 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
1470 	imm.dw1.d = w | (w << 16);
1471 	return imm;
1472 }
1473 
1474 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
1475  * numbers alias with _V and _VF below:
1476  */
1477 
1478 /** Construct vector of eight signed half-byte values */
brw_imm_v(unsigned v)1479 static inline struct brw_reg brw_imm_v(unsigned v)
1480 {
1481 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
1482 	imm.vstride = BRW_VERTICAL_STRIDE_0;
1483 	imm.width = BRW_WIDTH_8;
1484 	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1485 	imm.dw1.ud = v;
1486 	return imm;
1487 }
1488 
1489 /** Construct vector of four 8-bit float values */
brw_imm_vf(unsigned v)1490 static inline struct brw_reg brw_imm_vf(unsigned v)
1491 {
1492 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1493 	imm.vstride = BRW_VERTICAL_STRIDE_0;
1494 	imm.width = BRW_WIDTH_4;
1495 	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1496 	imm.dw1.ud = v;
1497 	return imm;
1498 }
1499 
1500 #define VF_ZERO 0x0
1501 #define VF_ONE  0x30
1502 #define VF_NEG  (1<<7)
1503 
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)1504 static inline struct brw_reg brw_imm_vf4(unsigned v0,
1505 					 unsigned v1,
1506 					 unsigned v2,
1507 					 unsigned v3)
1508 {
1509 	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1510 	imm.vstride = BRW_VERTICAL_STRIDE_0;
1511 	imm.width = BRW_WIDTH_4;
1512 	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1513 	imm.dw1.ud = ((v0 << 0) |
1514 		      (v1 << 8) |
1515 		      (v2 << 16) |
1516 		      (v3 << 24));
1517 	return imm;
1518 }
1519 
brw_address(struct brw_reg reg)1520 static inline struct brw_reg brw_address(struct brw_reg reg)
1521 {
1522 	return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
1523 }
1524 
1525 /** Construct float[1] general-purpose register */
brw_vec1_grf(unsigned nr,unsigned subnr)1526 static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
1527 {
1528 	return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1529 }
1530 
1531 /** Construct float[2] general-purpose register */
brw_vec2_grf(unsigned nr,unsigned subnr)1532 static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
1533 {
1534 	return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1535 }
1536 
1537 /** Construct float[4] general-purpose register */
brw_vec4_grf(unsigned nr,unsigned subnr)1538 static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
1539 {
1540 	return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1541 }
1542 
1543 /** Construct float[8] general-purpose register */
brw_vec8_grf(unsigned nr,unsigned subnr)1544 static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
1545 {
1546 	return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1547 }
1548 
brw_uw8_grf(unsigned nr,unsigned subnr)1549 static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
1550 {
1551 	return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1552 }
1553 
brw_uw16_grf(unsigned nr,unsigned subnr)1554 static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
1555 {
1556 	return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1557 }
1558 
1559 /** Construct null register (usually used for setting condition codes) */
brw_null_reg(void)1560 static inline struct brw_reg brw_null_reg(void)
1561 {
1562 	return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1563 			    BRW_ARF_NULL,
1564 			    0);
1565 }
1566 
brw_address_reg(unsigned subnr)1567 static inline struct brw_reg brw_address_reg(unsigned subnr)
1568 {
1569 	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1570 			   BRW_ARF_ADDRESS,
1571 			   subnr);
1572 }
1573 
1574 /* If/else instructions break in align16 mode if writemask & swizzle
1575  * aren't xyzw.  This goes against the convention for other scalar
1576  * regs:
1577  */
brw_ip_reg(void)1578 static inline struct brw_reg brw_ip_reg(void)
1579 {
1580 	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1581 		       BRW_ARF_IP,
1582 		       0,
1583 		       BRW_REGISTER_TYPE_UD,
1584 		       BRW_VERTICAL_STRIDE_4, /* ? */
1585 		       BRW_WIDTH_1,
1586 		       BRW_HORIZONTAL_STRIDE_0,
1587 		       BRW_SWIZZLE_XYZW, /* NOTE! */
1588 		       WRITEMASK_XYZW); /* NOTE! */
1589 }
1590 
brw_acc_reg(void)1591 static inline struct brw_reg brw_acc_reg(void)
1592 {
1593 	return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1594 			    BRW_ARF_ACCUMULATOR,
1595 			    0);
1596 }
1597 
brw_notification_1_reg(void)1598 static inline struct brw_reg brw_notification_1_reg(void)
1599 {
1600 	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1601 		       BRW_ARF_NOTIFICATION_COUNT,
1602 		       1,
1603 		       BRW_REGISTER_TYPE_UD,
1604 		       BRW_VERTICAL_STRIDE_0,
1605 		       BRW_WIDTH_1,
1606 		       BRW_HORIZONTAL_STRIDE_0,
1607 		       BRW_SWIZZLE_XXXX,
1608 		       WRITEMASK_X);
1609 }
1610 
brw_flag_reg(void)1611 static inline struct brw_reg brw_flag_reg(void)
1612 {
1613 	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1614 			   BRW_ARF_FLAG,
1615 			   0);
1616 }
1617 
brw_mask_reg(unsigned subnr)1618 static inline struct brw_reg brw_mask_reg(unsigned subnr)
1619 {
1620 	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1621 			   BRW_ARF_MASK,
1622 			   subnr);
1623 }
1624 
brw_message_reg(unsigned nr)1625 static inline struct brw_reg brw_message_reg(unsigned nr)
1626 {
1627 	assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1628 	return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
1629 }
1630 
brw_message4_reg(unsigned nr,int subnr)1631 static inline struct brw_reg brw_message4_reg(unsigned nr,
1632 					      int subnr)
1633 {
1634 	assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1635 	return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
1636 }
1637 
1638 /* This is almost always called with a numeric constant argument, so
1639  * make things easy to evaluate at compile time:
1640  */
cvt(unsigned val)1641 static inline unsigned cvt(unsigned val)
1642 {
1643 	switch (val) {
1644 	case 0: return 0;
1645 	case 1: return 1;
1646 	case 2: return 2;
1647 	case 4: return 3;
1648 	case 8: return 4;
1649 	case 16: return 5;
1650 	case 32: return 6;
1651 	}
1652 	return 0;
1653 }
1654 
__stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1655 static inline struct brw_reg __stride(struct brw_reg reg,
1656 				    unsigned vstride,
1657 				    unsigned width,
1658 				    unsigned hstride)
1659 {
1660 	reg.vstride = cvt(vstride);
1661 	reg.width = cvt(width) - 1;
1662 	reg.hstride = cvt(hstride);
1663 	return reg;
1664 }
1665 
vec16(struct brw_reg reg)1666 static inline struct brw_reg vec16(struct brw_reg reg)
1667 {
1668 	return __stride(reg, 16,16,1);
1669 }
1670 
vec8(struct brw_reg reg)1671 static inline struct brw_reg vec8(struct brw_reg reg)
1672 {
1673 	return __stride(reg, 8,8,1);
1674 }
1675 
vec4(struct brw_reg reg)1676 static inline struct brw_reg vec4(struct brw_reg reg)
1677 {
1678 	return __stride(reg, 4,4,1);
1679 }
1680 
vec2(struct brw_reg reg)1681 static inline struct brw_reg vec2(struct brw_reg reg)
1682 {
1683 	return __stride(reg, 2,2,1);
1684 }
1685 
vec1(struct brw_reg reg)1686 static inline struct brw_reg vec1(struct brw_reg reg)
1687 {
1688 	return __stride(reg, 0,1,0);
1689 }
1690 
get_element(struct brw_reg reg,unsigned elt)1691 static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
1692 {
1693 	return vec1(__suboffset(reg, elt));
1694 }
1695 
get_element_ud(struct brw_reg reg,unsigned elt)1696 static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
1697 {
1698 	return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
1699 }
1700 
brw_swizzle(struct brw_reg reg,unsigned x,unsigned y,unsigned z,unsigned w)1701 static inline struct brw_reg brw_swizzle(struct brw_reg reg,
1702 					 unsigned x,
1703 					 unsigned y,
1704 					 unsigned z,
1705 					 unsigned w)
1706 {
1707 	assert(reg.file != BRW_IMMEDIATE_VALUE);
1708 
1709 	reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
1710 					    BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
1711 					    BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
1712 					    BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
1713 	return reg;
1714 }
1715 
brw_swizzle1(struct brw_reg reg,unsigned x)1716 static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
1717 					  unsigned x)
1718 {
1719 	return brw_swizzle(reg, x, x, x, x);
1720 }
1721 
brw_writemask(struct brw_reg reg,unsigned mask)1722 static inline struct brw_reg brw_writemask(struct brw_reg reg,
1723 					   unsigned mask)
1724 {
1725 	assert(reg.file != BRW_IMMEDIATE_VALUE);
1726 	reg.dw1.bits.writemask &= mask;
1727 	return reg;
1728 }
1729 
brw_set_writemask(struct brw_reg reg,unsigned mask)1730 static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
1731 					       unsigned mask)
1732 {
1733 	assert(reg.file != BRW_IMMEDIATE_VALUE);
1734 	reg.dw1.bits.writemask = mask;
1735 	return reg;
1736 }
1737 
brw_negate(struct brw_reg reg)1738 static inline struct brw_reg brw_negate(struct brw_reg reg)
1739 {
1740 	reg.negate ^= 1;
1741 	return reg;
1742 }
1743 
brw_abs(struct brw_reg reg)1744 static inline struct brw_reg brw_abs(struct brw_reg reg)
1745 {
1746 	reg.abs = 1;
1747 	return reg;
1748 }
1749 
1750 /***********************************************************************
1751 */
brw_vec4_indirect(unsigned subnr,int offset)1752 static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
1753 					       int offset)
1754 {
1755 	struct brw_reg reg =  brw_vec4_grf(0, 0);
1756 	reg.subnr = subnr;
1757 	reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1758 	reg.dw1.bits.indirect_offset = offset;
1759 	return reg;
1760 }
1761 
brw_vec1_indirect(unsigned subnr,int offset)1762 static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
1763 					       int offset)
1764 {
1765 	struct brw_reg reg =  brw_vec1_grf(0, 0);
1766 	reg.subnr = subnr;
1767 	reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1768 	reg.dw1.bits.indirect_offset = offset;
1769 	return reg;
1770 }
1771 
deref_4f(struct brw_indirect ptr,int offset)1772 static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
1773 {
1774 	return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1775 }
1776 
deref_1f(struct brw_indirect ptr,int offset)1777 static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
1778 {
1779 	return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1780 }
1781 
deref_4b(struct brw_indirect ptr,int offset)1782 static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
1783 {
1784 	return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1785 }
1786 
deref_1uw(struct brw_indirect ptr,int offset)1787 static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
1788 {
1789 	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1790 }
1791 
deref_1d(struct brw_indirect ptr,int offset)1792 static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
1793 {
1794 	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1795 }
1796 
deref_1ud(struct brw_indirect ptr,int offset)1797 static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
1798 {
1799 	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1800 }
1801 
get_addr_reg(struct brw_indirect ptr)1802 static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
1803 {
1804 	return brw_address_reg(ptr.addr_subnr);
1805 }
1806 
brw_indirect_offset(struct brw_indirect ptr,int offset)1807 static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
1808 {
1809 	ptr.addr_offset += offset;
1810 	return ptr;
1811 }
1812 
brw_indirect(unsigned addr_subnr,int offset)1813 static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
1814 {
1815 	struct brw_indirect ptr;
1816 	ptr.addr_subnr = addr_subnr;
1817 	ptr.addr_offset = offset;
1818 	ptr.pad = 0;
1819 	return ptr;
1820 }
1821 
1822 /** Do two brw_regs refer to the same register? */
brw_same_reg(struct brw_reg r1,struct brw_reg r2)1823 static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
1824 {
1825 	return r1.file == r2.file && r1.nr == r2.nr;
1826 }
1827 
current_insn(struct brw_compile * p)1828 static inline struct brw_instruction *current_insn( struct brw_compile *p)
1829 {
1830 	return &p->store[p->nr_insn];
1831 }
1832 
brw_set_predicate_control(struct brw_compile * p,unsigned pc)1833 static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
1834 {
1835 	p->current->header.predicate_control = pc;
1836 }
1837 
brw_set_predicate_inverse(struct brw_compile * p,bool predicate_inverse)1838 static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
1839 {
1840 	p->current->header.predicate_inverse = predicate_inverse;
1841 }
1842 
brw_set_conditionalmod(struct brw_compile * p,unsigned conditional)1843 static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
1844 {
1845 	p->current->header.destreg__conditionalmod = conditional;
1846 }
1847 
brw_set_access_mode(struct brw_compile * p,unsigned access_mode)1848 static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
1849 {
1850 	p->current->header.access_mode = access_mode;
1851 }
1852 
brw_set_mask_control(struct brw_compile * p,unsigned value)1853 static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
1854 {
1855 	p->current->header.mask_control = value;
1856 }
1857 
brw_set_saturate(struct brw_compile * p,unsigned value)1858 static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
1859 {
1860 	p->current->header.saturate = value;
1861 }
1862 
brw_set_acc_write_control(struct brw_compile * p,unsigned value)1863 static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
1864 {
1865 	if (p->gen >= 060)
1866 		p->current->header.acc_wr_control = value;
1867 }
1868 
1869 void brw_pop_insn_state(struct brw_compile *p);
1870 void brw_push_insn_state(struct brw_compile *p);
1871 void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
1872 void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
1873 
1874 void brw_compile_init(struct brw_compile *p, int gen, void *store);
1875 
1876 void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
1877 		  struct brw_reg dest);
1878 void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
1879 		  struct brw_reg reg);
1880 void brw_set_src1(struct brw_compile *p,
1881 		  struct brw_instruction *insn,
1882 		  struct brw_reg reg);
1883 
1884 void gen6_resolve_implied_move(struct brw_compile *p,
1885 			       struct brw_reg *src,
1886 			       unsigned msg_reg_nr);
1887 
1888 static inline struct brw_instruction *
brw_next_insn(struct brw_compile * p,unsigned opcode)1889 brw_next_insn(struct brw_compile *p, unsigned opcode)
1890 {
1891 	struct brw_instruction *insn;
1892 
1893 	assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
1894 
1895 	insn = &p->store[p->nr_insn++];
1896 	*insn = *p->current;
1897 
1898 	if (p->current->header.destreg__conditionalmod) {
1899 		p->current->header.destreg__conditionalmod = 0;
1900 		p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1901 	}
1902 
1903 	insn->header.opcode = opcode;
1904 	return insn;
1905 }
1906 
1907 /* Helpers for regular instructions: */
1908 #define ALU1(OP)							\
1909 static inline struct brw_instruction *brw_##OP(struct brw_compile *p,	\
1910 					       struct brw_reg dest,	\
1911 					       struct brw_reg src0)	\
1912 {									\
1913    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);			\
1914 }
1915 
1916 #define ALU2(OP)							\
1917 static inline struct brw_instruction *brw_##OP(struct brw_compile *p,	\
1918 					       struct brw_reg dest,	\
1919 					       struct brw_reg src0,	\
1920 						struct brw_reg src1)	\
1921 {									\
1922    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);		\
1923 }
1924 
1925 /* Rounding operations (other than RNDD) require two instructions - the first
1926  * stores a rounded value (possibly the wrong way) in the dest register, but
1927  * also sets a per-channel "increment bit" in the flag register.  A predicated
1928  * add of 1.0 fixes dest to contain the desired result.
1929  *
1930  * Sandybridge and later appear to round correctly without an ADD.
1931  */
1932 #define ROUND(OP)							\
1933 static inline void brw_##OP(struct brw_compile *p,			\
1934 			    struct brw_reg dest,			\
1935 			    struct brw_reg src)				\
1936 {									\
1937 	struct brw_instruction *rnd, *add;				\
1938 	rnd = brw_next_insn(p, BRW_OPCODE_##OP);			\
1939 	brw_set_dest(p, rnd, dest);					\
1940 	brw_set_src0(p, rnd, src);					\
1941 	if (p->gen < 060) {						\
1942 		/* turn on round-increments */				\
1943 		rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1944 		add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));		\
1945 		add->header.predicate_control = BRW_PREDICATE_NORMAL;	\
1946 	}								\
1947 }
1948 
brw_alu1(struct brw_compile * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)1949 static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
1950 					       unsigned opcode,
1951 					       struct brw_reg dest,
1952 					       struct brw_reg src)
1953 {
1954 	struct brw_instruction *insn = brw_next_insn(p, opcode);
1955 	brw_set_dest(p, insn, dest);
1956 	brw_set_src0(p, insn, src);
1957 	return insn;
1958 }
1959 
brw_alu2(struct brw_compile * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1960 static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
1961 					       unsigned opcode,
1962 					       struct brw_reg dest,
1963 					       struct brw_reg src0,
1964 					       struct brw_reg src1 )
1965 {
1966 	struct brw_instruction *insn = brw_next_insn(p, opcode);
1967 	brw_set_dest(p, insn, dest);
1968 	brw_set_src0(p, insn, src0);
1969 	brw_set_src1(p, insn, src1);
1970 	return insn;
1971 }
1972 
brw_ADD(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1973 static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
1974 					      struct brw_reg dest,
1975 					      struct brw_reg src0,
1976 					      struct brw_reg src1)
1977 {
1978 	/* 6.2.2: add */
1979 	if (src0.type == BRW_REGISTER_TYPE_F ||
1980 	    (src0.file == BRW_IMMEDIATE_VALUE &&
1981 	     src0.type == BRW_REGISTER_TYPE_VF)) {
1982 		assert(src1.type != BRW_REGISTER_TYPE_UD);
1983 		assert(src1.type != BRW_REGISTER_TYPE_D);
1984 	}
1985 
1986 	if (src1.type == BRW_REGISTER_TYPE_F ||
1987 	    (src1.file == BRW_IMMEDIATE_VALUE &&
1988 	     src1.type == BRW_REGISTER_TYPE_VF)) {
1989 		assert(src0.type != BRW_REGISTER_TYPE_UD);
1990 		assert(src0.type != BRW_REGISTER_TYPE_D);
1991 	}
1992 
1993 	return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1994 }
1995 
brw_MUL(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1996 static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
1997 					      struct brw_reg dest,
1998 					      struct brw_reg src0,
1999 					      struct brw_reg src1)
2000 {
2001 	/* 6.32.38: mul */
2002 	if (src0.type == BRW_REGISTER_TYPE_D ||
2003 	    src0.type == BRW_REGISTER_TYPE_UD ||
2004 	    src1.type == BRW_REGISTER_TYPE_D ||
2005 	    src1.type == BRW_REGISTER_TYPE_UD) {
2006 		assert(dest.type != BRW_REGISTER_TYPE_F);
2007 	}
2008 
2009 	if (src0.type == BRW_REGISTER_TYPE_F ||
2010 	    (src0.file == BRW_IMMEDIATE_VALUE &&
2011 	     src0.type == BRW_REGISTER_TYPE_VF)) {
2012 		assert(src1.type != BRW_REGISTER_TYPE_UD);
2013 		assert(src1.type != BRW_REGISTER_TYPE_D);
2014 	}
2015 
2016 	if (src1.type == BRW_REGISTER_TYPE_F ||
2017 	    (src1.file == BRW_IMMEDIATE_VALUE &&
2018 	     src1.type == BRW_REGISTER_TYPE_VF)) {
2019 		assert(src0.type != BRW_REGISTER_TYPE_UD);
2020 		assert(src0.type != BRW_REGISTER_TYPE_D);
2021 	}
2022 
2023 	assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2024 	       src0.nr != BRW_ARF_ACCUMULATOR);
2025 	assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2026 	       src1.nr != BRW_ARF_ACCUMULATOR);
2027 
2028 	return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
2029 }
2030 
brw_JMPI(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)2031 static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
2032 					       struct brw_reg dest,
2033 					       struct brw_reg src0,
2034 					       struct brw_reg src1)
2035 {
2036 	struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
2037 
2038 	insn->header.execution_size = 1;
2039 	insn->header.compression_control = BRW_COMPRESSION_NONE;
2040 	insn->header.mask_control = BRW_MASK_DISABLE;
2041 
2042 	p->current->header.predicate_control = BRW_PREDICATE_NONE;
2043 
2044 	return insn;
2045 }
2046 
2047 
2048 ALU1(MOV);
2049 ALU2(SEL);
2050 ALU1(NOT);
2051 ALU2(AND);
2052 ALU2(OR);
2053 ALU2(XOR);
2054 ALU2(SHR);
2055 ALU2(SHL);
2056 ALU2(RSR);
2057 ALU2(RSL);
2058 ALU2(ASR);
2059 ALU1(FRC);
2060 ALU1(RNDD);
2061 ALU2(MAC);
2062 ALU2(MACH);
2063 ALU1(LZD);
2064 ALU2(DP4);
2065 ALU2(DPH);
2066 ALU2(DP3);
2067 ALU2(DP2);
2068 ALU2(LINE);
2069 ALU2(PLN);
2070 
2071 ROUND(RNDZ);
2072 ROUND(RNDE);
2073 
2074 #undef ALU1
2075 #undef ALU2
2076 #undef ROUND
2077 
2078 /* Helpers for SEND instruction */
2079 void brw_set_dp_read_message(struct brw_compile *p,
2080 			     struct brw_instruction *insn,
2081 			     unsigned binding_table_index,
2082 			     unsigned msg_control,
2083 			     unsigned msg_type,
2084 			     unsigned target_cache,
2085 			     unsigned msg_length,
2086 			     unsigned response_length);
2087 
2088 void brw_set_dp_write_message(struct brw_compile *p,
2089 			      struct brw_instruction *insn,
2090 			      unsigned binding_table_index,
2091 			      unsigned msg_control,
2092 			      unsigned msg_type,
2093 			      unsigned msg_length,
2094 			      bool header_present,
2095 			      bool last_render_target,
2096 			      unsigned response_length,
2097 			      bool end_of_thread,
2098 			      bool send_commit_msg);
2099 
2100 void brw_urb_WRITE(struct brw_compile *p,
2101 		   struct brw_reg dest,
2102 		   unsigned msg_reg_nr,
2103 		   struct brw_reg src0,
2104 		   bool allocate,
2105 		   bool used,
2106 		   unsigned msg_length,
2107 		   unsigned response_length,
2108 		   bool eot,
2109 		   bool writes_complete,
2110 		   unsigned offset,
2111 		   unsigned swizzle);
2112 
2113 void brw_ff_sync(struct brw_compile *p,
2114 		 struct brw_reg dest,
2115 		 unsigned msg_reg_nr,
2116 		 struct brw_reg src0,
2117 		 bool allocate,
2118 		 unsigned response_length,
2119 		 bool eot);
2120 
2121 void brw_fb_WRITE(struct brw_compile *p,
2122 		  int dispatch_width,
2123                   unsigned msg_reg_nr,
2124                   struct brw_reg src0,
2125                   unsigned msg_control,
2126                   unsigned binding_table_index,
2127                   unsigned msg_length,
2128                   unsigned response_length,
2129                   bool eot,
2130                   bool header_present);
2131 
2132 void brw_SAMPLE(struct brw_compile *p,
2133 		struct brw_reg dest,
2134 		unsigned msg_reg_nr,
2135 		struct brw_reg src0,
2136 		unsigned binding_table_index,
2137 		unsigned sampler,
2138 		unsigned writemask,
2139 		unsigned msg_type,
2140 		unsigned response_length,
2141 		unsigned msg_length,
2142 		bool header_present,
2143 		unsigned simd_mode);
2144 
2145 void brw_math_16(struct brw_compile *p,
2146 		 struct brw_reg dest,
2147 		 unsigned function,
2148 		 unsigned saturate,
2149 		 unsigned msg_reg_nr,
2150 		 struct brw_reg src,
2151 		 unsigned precision);
2152 
2153 void brw_math(struct brw_compile *p,
2154 	      struct brw_reg dest,
2155 	      unsigned function,
2156 	      unsigned saturate,
2157 	      unsigned msg_reg_nr,
2158 	      struct brw_reg src,
2159 	      unsigned data_type,
2160 	      unsigned precision);
2161 
2162 void brw_math2(struct brw_compile *p,
2163 	       struct brw_reg dest,
2164 	       unsigned function,
2165 	       struct brw_reg src0,
2166 	       struct brw_reg src1);
2167 
2168 void brw_oword_block_read(struct brw_compile *p,
2169 			  struct brw_reg dest,
2170 			  struct brw_reg mrf,
2171 			  uint32_t offset,
2172 			  uint32_t bind_table_index);
2173 
2174 void brw_oword_block_read_scratch(struct brw_compile *p,
2175 				  struct brw_reg dest,
2176 				  struct brw_reg mrf,
2177 				  int num_regs,
2178 				  unsigned offset);
2179 
2180 void brw_oword_block_write_scratch(struct brw_compile *p,
2181 				   struct brw_reg mrf,
2182 				   int num_regs,
2183 				   unsigned offset);
2184 
2185 void brw_dword_scattered_read(struct brw_compile *p,
2186 			      struct brw_reg dest,
2187 			      struct brw_reg mrf,
2188 			      uint32_t bind_table_index);
2189 
2190 void brw_dp_READ_4_vs(struct brw_compile *p,
2191 		      struct brw_reg dest,
2192 		      unsigned location,
2193 		      unsigned bind_table_index);
2194 
2195 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2196 			       struct brw_reg dest,
2197 			       struct brw_reg addrReg,
2198 			       unsigned offset,
2199 			       unsigned bind_table_index);
2200 
2201 /* If/else/endif.  Works by manipulating the execution flags on each
2202  * channel.
2203  */
2204 struct brw_instruction *brw_IF(struct brw_compile *p,
2205 			       unsigned execute_size);
2206 struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
2207 				struct brw_reg src0, struct brw_reg src1);
2208 
2209 void brw_ELSE(struct brw_compile *p);
2210 void brw_ENDIF(struct brw_compile *p);
2211 
2212 /* DO/WHILE loops:
2213 */
2214 struct brw_instruction *brw_DO(struct brw_compile *p,
2215 			       unsigned execute_size);
2216 
2217 struct brw_instruction *brw_WHILE(struct brw_compile *p,
2218 				  struct brw_instruction *patch_insn);
2219 
2220 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
2221 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
2222 struct brw_instruction *gen6_CONT(struct brw_compile *p,
2223 				  struct brw_instruction *do_insn);
2224 /* Forward jumps:
2225 */
2226 void brw_land_fwd_jump(struct brw_compile *p,
2227 		       struct brw_instruction *jmp_insn);
2228 
2229 void brw_NOP(struct brw_compile *p);
2230 
2231 void brw_WAIT(struct brw_compile *p);
2232 
2233 /* Special case: there is never a destination, execution size will be
2234  * taken from src0:
2235  */
2236 void brw_CMP(struct brw_compile *p,
2237 	     struct brw_reg dest,
2238 	     unsigned conditional,
2239 	     struct brw_reg src0,
2240 	     struct brw_reg src1);
2241 
brw_math_invert(struct brw_compile * p,struct brw_reg dst,struct brw_reg src)2242 static inline void brw_math_invert(struct brw_compile *p,
2243 				   struct brw_reg dst,
2244 				   struct brw_reg src)
2245 {
2246 	brw_math(p,
2247 		 dst,
2248 		 BRW_MATH_FUNCTION_INV,
2249 		 BRW_MATH_SATURATE_NONE,
2250 		 0,
2251 		 src,
2252 		 BRW_MATH_PRECISION_FULL,
2253 		 BRW_MATH_DATA_VECTOR);
2254 }
2255 
2256 void brw_set_uip_jip(struct brw_compile *p);
2257 
2258 uint32_t brw_swap_cmod(uint32_t cmod);
2259 
2260 void brw_disasm(FILE *file,
2261 		const struct brw_instruction *inst,
2262 		int gen);
2263 
2264 #endif
2265