1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #ifndef BRW_EU_H
33 #define BRW_EU_H
34
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <stdio.h>
38
39 #include <assert.h>
40
41 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
42 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
43
44 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
45 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
46 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
47 #define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
48 #define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
49 #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
50 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
51
52 #define WRITEMASK_X 0x1
53 #define WRITEMASK_Y 0x2
54 #define WRITEMASK_Z 0x4
55 #define WRITEMASK_W 0x8
56
57 #define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
58 #define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
59 #define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
60
61 /** Number of general purpose registers (VS, WM, etc) */
62 #define BRW_MAX_GRF 128
63
64 /** Number of message register file registers */
65 #define BRW_MAX_MRF 16
66
67
68 #define BRW_ALIGN_1 0
69 #define BRW_ALIGN_16 1
70
71 #define BRW_ADDRESS_DIRECT 0
72 #define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
73
74 #define BRW_CHANNEL_X 0
75 #define BRW_CHANNEL_Y 1
76 #define BRW_CHANNEL_Z 2
77 #define BRW_CHANNEL_W 3
78
79 enum brw_compression {
80 BRW_COMPRESSION_NONE,
81 BRW_COMPRESSION_2NDHALF,
82 BRW_COMPRESSION_COMPRESSED,
83 };
84
85 #define GEN6_COMPRESSION_1Q 0
86 #define GEN6_COMPRESSION_2Q 1
87 #define GEN6_COMPRESSION_3Q 2
88 #define GEN6_COMPRESSION_4Q 3
89 #define GEN6_COMPRESSION_1H 0
90 #define GEN6_COMPRESSION_2H 2
91
92 #define BRW_CONDITIONAL_NONE 0
93 #define BRW_CONDITIONAL_Z 1
94 #define BRW_CONDITIONAL_NZ 2
95 #define BRW_CONDITIONAL_EQ 1 /* Z */
96 #define BRW_CONDITIONAL_NEQ 2 /* NZ */
97 #define BRW_CONDITIONAL_G 3
98 #define BRW_CONDITIONAL_GE 4
99 #define BRW_CONDITIONAL_L 5
100 #define BRW_CONDITIONAL_LE 6
101 #define BRW_CONDITIONAL_R 7
102 #define BRW_CONDITIONAL_O 8
103 #define BRW_CONDITIONAL_U 9
104
105 #define BRW_DEBUG_NONE 0
106 #define BRW_DEBUG_BREAKPOINT 1
107
108 #define BRW_DEPENDENCY_NORMAL 0
109 #define BRW_DEPENDENCY_NOTCLEARED 1
110 #define BRW_DEPENDENCY_NOTCHECKED 2
111 #define BRW_DEPENDENCY_DISABLE 3
112
113 #define BRW_EXECUTE_1 0
114 #define BRW_EXECUTE_2 1
115 #define BRW_EXECUTE_4 2
116 #define BRW_EXECUTE_8 3
117 #define BRW_EXECUTE_16 4
118 #define BRW_EXECUTE_32 5
119
120 #define BRW_HORIZONTAL_STRIDE_0 0
121 #define BRW_HORIZONTAL_STRIDE_1 1
122 #define BRW_HORIZONTAL_STRIDE_2 2
123 #define BRW_HORIZONTAL_STRIDE_4 3
124
125 #define BRW_INSTRUCTION_NORMAL 0
126 #define BRW_INSTRUCTION_SATURATE 1
127
128 #define BRW_MASK_ENABLE 0
129 #define BRW_MASK_DISABLE 1
130
131 /** @{
132 *
133 * Gen6 has replaced "mask enable/disable" with WECtrl, which is
134 * effectively the same but much simpler to think about. Now, there
135 * are two contributors ANDed together to whether channels are
136 * executed: The predication on the instruction, and the channel write
137 * enable.
138 */
139 /**
140 * This is the default value. It means that a channel's write enable is set
141 * if the per-channel IP is pointing at this instruction.
142 */
143 #define BRW_WE_NORMAL 0
144 /**
145 * This is used like BRW_MASK_DISABLE, and causes all channels to have
146 * their write enable set. Note that predication still contributes to
147 * whether the channel actually gets written.
148 */
149 #define BRW_WE_ALL 1
150 /** @} */
151
152 enum opcode {
153 /* These are the actual hardware opcodes. */
154 BRW_OPCODE_MOV = 1,
155 BRW_OPCODE_SEL = 2,
156 BRW_OPCODE_NOT = 4,
157 BRW_OPCODE_AND = 5,
158 BRW_OPCODE_OR = 6,
159 BRW_OPCODE_XOR = 7,
160 BRW_OPCODE_SHR = 8,
161 BRW_OPCODE_SHL = 9,
162 BRW_OPCODE_RSR = 10,
163 BRW_OPCODE_RSL = 11,
164 BRW_OPCODE_ASR = 12,
165 BRW_OPCODE_CMP = 16,
166 BRW_OPCODE_CMPN = 17,
167 BRW_OPCODE_JMPI = 32,
168 BRW_OPCODE_IF = 34,
169 BRW_OPCODE_IFF = 35,
170 BRW_OPCODE_ELSE = 36,
171 BRW_OPCODE_ENDIF = 37,
172 BRW_OPCODE_DO = 38,
173 BRW_OPCODE_WHILE = 39,
174 BRW_OPCODE_BREAK = 40,
175 BRW_OPCODE_CONTINUE = 41,
176 BRW_OPCODE_HALT = 42,
177 BRW_OPCODE_MSAVE = 44,
178 BRW_OPCODE_MRESTORE = 45,
179 BRW_OPCODE_PUSH = 46,
180 BRW_OPCODE_POP = 47,
181 BRW_OPCODE_WAIT = 48,
182 BRW_OPCODE_SEND = 49,
183 BRW_OPCODE_SENDC = 50,
184 BRW_OPCODE_MATH = 56,
185 BRW_OPCODE_ADD = 64,
186 BRW_OPCODE_MUL = 65,
187 BRW_OPCODE_AVG = 66,
188 BRW_OPCODE_FRC = 67,
189 BRW_OPCODE_RNDU = 68,
190 BRW_OPCODE_RNDD = 69,
191 BRW_OPCODE_RNDE = 70,
192 BRW_OPCODE_RNDZ = 71,
193 BRW_OPCODE_MAC = 72,
194 BRW_OPCODE_MACH = 73,
195 BRW_OPCODE_LZD = 74,
196 BRW_OPCODE_SAD2 = 80,
197 BRW_OPCODE_SADA2 = 81,
198 BRW_OPCODE_DP4 = 84,
199 BRW_OPCODE_DPH = 85,
200 BRW_OPCODE_DP3 = 86,
201 BRW_OPCODE_DP2 = 87,
202 BRW_OPCODE_DPA2 = 88,
203 BRW_OPCODE_LINE = 89,
204 BRW_OPCODE_PLN = 90,
205 BRW_OPCODE_NOP = 126,
206
207 /* These are compiler backend opcodes that get translated into other
208 * instructions.
209 */
210 FS_OPCODE_FB_WRITE = 128,
211 SHADER_OPCODE_RCP,
212 SHADER_OPCODE_RSQ,
213 SHADER_OPCODE_SQRT,
214 SHADER_OPCODE_EXP2,
215 SHADER_OPCODE_LOG2,
216 SHADER_OPCODE_POW,
217 SHADER_OPCODE_SIN,
218 SHADER_OPCODE_COS,
219 FS_OPCODE_DDX,
220 FS_OPCODE_DDY,
221 FS_OPCODE_PIXEL_X,
222 FS_OPCODE_PIXEL_Y,
223 FS_OPCODE_CINTERP,
224 FS_OPCODE_LINTERP,
225 FS_OPCODE_TEX,
226 FS_OPCODE_TXB,
227 FS_OPCODE_TXD,
228 FS_OPCODE_TXF,
229 FS_OPCODE_TXL,
230 FS_OPCODE_TXS,
231 FS_OPCODE_DISCARD,
232 FS_OPCODE_SPILL,
233 FS_OPCODE_UNSPILL,
234 FS_OPCODE_PULL_CONSTANT_LOAD,
235
236 VS_OPCODE_URB_WRITE,
237 VS_OPCODE_SCRATCH_READ,
238 VS_OPCODE_SCRATCH_WRITE,
239 VS_OPCODE_PULL_CONSTANT_LOAD,
240 };
241
242 #define BRW_PREDICATE_NONE 0
243 #define BRW_PREDICATE_NORMAL 1
244 #define BRW_PREDICATE_ALIGN1_ANYV 2
245 #define BRW_PREDICATE_ALIGN1_ALLV 3
246 #define BRW_PREDICATE_ALIGN1_ANY2H 4
247 #define BRW_PREDICATE_ALIGN1_ALL2H 5
248 #define BRW_PREDICATE_ALIGN1_ANY4H 6
249 #define BRW_PREDICATE_ALIGN1_ALL4H 7
250 #define BRW_PREDICATE_ALIGN1_ANY8H 8
251 #define BRW_PREDICATE_ALIGN1_ALL8H 9
252 #define BRW_PREDICATE_ALIGN1_ANY16H 10
253 #define BRW_PREDICATE_ALIGN1_ALL16H 11
254 #define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
255 #define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
256 #define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
257 #define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
258 #define BRW_PREDICATE_ALIGN16_ANY4H 6
259 #define BRW_PREDICATE_ALIGN16_ALL4H 7
260
261 #define BRW_ARCHITECTURE_REGISTER_FILE 0
262 #define BRW_GENERAL_REGISTER_FILE 1
263 #define BRW_MESSAGE_REGISTER_FILE 2
264 #define BRW_IMMEDIATE_VALUE 3
265
266 #define BRW_REGISTER_TYPE_UD 0
267 #define BRW_REGISTER_TYPE_D 1
268 #define BRW_REGISTER_TYPE_UW 2
269 #define BRW_REGISTER_TYPE_W 3
270 #define BRW_REGISTER_TYPE_UB 4
271 #define BRW_REGISTER_TYPE_B 5
272 #define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
273 #define BRW_REGISTER_TYPE_HF 6
274 #define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
275 #define BRW_REGISTER_TYPE_F 7
276
277 #define BRW_ARF_NULL 0x00
278 #define BRW_ARF_ADDRESS 0x10
279 #define BRW_ARF_ACCUMULATOR 0x20
280 #define BRW_ARF_FLAG 0x30
281 #define BRW_ARF_MASK 0x40
282 #define BRW_ARF_MASK_STACK 0x50
283 #define BRW_ARF_MASK_STACK_DEPTH 0x60
284 #define BRW_ARF_STATE 0x70
285 #define BRW_ARF_CONTROL 0x80
286 #define BRW_ARF_NOTIFICATION_COUNT 0x90
287 #define BRW_ARF_IP 0xA0
288
289 #define BRW_MRF_COMPR4 (1 << 7)
290
291 #define BRW_AMASK 0
292 #define BRW_IMASK 1
293 #define BRW_LMASK 2
294 #define BRW_CMASK 3
295
296 #define BRW_THREAD_NORMAL 0
297 #define BRW_THREAD_ATOMIC 1
298 #define BRW_THREAD_SWITCH 2
299
300 #define BRW_VERTICAL_STRIDE_0 0
301 #define BRW_VERTICAL_STRIDE_1 1
302 #define BRW_VERTICAL_STRIDE_2 2
303 #define BRW_VERTICAL_STRIDE_4 3
304 #define BRW_VERTICAL_STRIDE_8 4
305 #define BRW_VERTICAL_STRIDE_16 5
306 #define BRW_VERTICAL_STRIDE_32 6
307 #define BRW_VERTICAL_STRIDE_64 7
308 #define BRW_VERTICAL_STRIDE_128 8
309 #define BRW_VERTICAL_STRIDE_256 9
310 #define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
311
312 #define BRW_WIDTH_1 0
313 #define BRW_WIDTH_2 1
314 #define BRW_WIDTH_4 2
315 #define BRW_WIDTH_8 3
316 #define BRW_WIDTH_16 4
317
318 #define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
319 #define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
320 #define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
321 #define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
322 #define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
323 #define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
324 #define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
325 #define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
326 #define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
327 #define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
328 #define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
329 #define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
330
331 #define BRW_POLYGON_FACING_FRONT 0
332 #define BRW_POLYGON_FACING_BACK 1
333
334 #define BRW_MESSAGE_TARGET_NULL 0
335 #define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
336 #define BRW_MESSAGE_TARGET_SAMPLER 2
337 #define BRW_MESSAGE_TARGET_GATEWAY 3
338 #define BRW_MESSAGE_TARGET_DATAPORT_READ 4
339 #define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
340 #define BRW_MESSAGE_TARGET_URB 6
341 #define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
342
343 #define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4
344 #define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5
345 #define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9
346
347 #define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
348 #define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
349 #define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
350
351 #define BRW_SAMPLER_MESSAGE_SAMPLE 0
352 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
353 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
354 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
355 #define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
356 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
357 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
358 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
359 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
360 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
361 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
362 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
363 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
364 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
365 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
366 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
367 #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
368 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
369
370 #define GEN5_SAMPLER_MESSAGE_SAMPLE 0
371 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
372 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
373 #define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
374 #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
375 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
376 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
377 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
378 #define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
379
380 /* for GEN5 only */
381 #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
382 #define BRW_SAMPLER_SIMD_MODE_SIMD8 1
383 #define BRW_SAMPLER_SIMD_MODE_SIMD16 2
384 #define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
385
386 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
387 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
388 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
389 #define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
390 #define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
391
392 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
393 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
394
395 #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
396 #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
397
398 /* This one stays the same across generations. */
399 #define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
400 /* GEN4 */
401 #define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
402 #define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
403 #define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
404 /* G45, GEN5 */
405 #define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
406 #define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
407 #define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
408 #define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
409 #define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
410 /* GEN6 */
411 #define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
412 #define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
413 #define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
414 #define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
415 #define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
416
417 #define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
418 #define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
419 #define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
420
421 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
422 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
423 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
424 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
425 #define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
426
427 /**
428 * Message target: Shared Function ID for where to SEND a message.
429 *
430 * These are enumerated in the ISA reference under "send - Send Message".
431 * In particular, see the following tables:
432 * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
433 * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
434 * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
435 * Overview / GPE Function IDs
436 */
437 enum brw_message_target {
438 BRW_SFID_NULL = 0,
439 BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
440 BRW_SFID_SAMPLER = 2,
441 BRW_SFID_MESSAGE_GATEWAY = 3,
442 BRW_SFID_DATAPORT_READ = 4,
443 BRW_SFID_DATAPORT_WRITE = 5,
444 BRW_SFID_URB = 6,
445 BRW_SFID_THREAD_SPAWNER = 7,
446
447 GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
448 GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
449 GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
450
451 GEN7_SFID_DATAPORT_DATA_CACHE = 10,
452 };
453
454 #define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
455
456 #define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
457 #define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
458 #define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
459 #define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
460 #define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
461 #define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
462 #define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
463
464 /* GEN6 */
465 #define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
466 #define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
467 #define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
468 #define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
469 #define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
470 #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
471 #define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
472 #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
473
474 #define BRW_MATH_FUNCTION_INV 1
475 #define BRW_MATH_FUNCTION_LOG 2
476 #define BRW_MATH_FUNCTION_EXP 3
477 #define BRW_MATH_FUNCTION_SQRT 4
478 #define BRW_MATH_FUNCTION_RSQ 5
479 #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
480 #define BRW_MATH_FUNCTION_COS 7 /* was 8 */
481 #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
482 #define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
483 #define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
484 #define BRW_MATH_FUNCTION_POW 10
485 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
486 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
487 #define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
488
489 #define BRW_MATH_INTEGER_UNSIGNED 0
490 #define BRW_MATH_INTEGER_SIGNED 1
491
492 #define BRW_MATH_PRECISION_FULL 0
493 #define BRW_MATH_PRECISION_PARTIAL 1
494
495 #define BRW_MATH_SATURATE_NONE 0
496 #define BRW_MATH_SATURATE_SATURATE 1
497
498 #define BRW_MATH_DATA_VECTOR 0
499 #define BRW_MATH_DATA_SCALAR 1
500
501 #define BRW_URB_OPCODE_WRITE 0
502
503 #define BRW_URB_SWIZZLE_NONE 0
504 #define BRW_URB_SWIZZLE_INTERLEAVE 1
505 #define BRW_URB_SWIZZLE_TRANSPOSE 2
506
507 #define BRW_SCRATCH_SPACE_SIZE_1K 0
508 #define BRW_SCRATCH_SPACE_SIZE_2K 1
509 #define BRW_SCRATCH_SPACE_SIZE_4K 2
510 #define BRW_SCRATCH_SPACE_SIZE_8K 3
511 #define BRW_SCRATCH_SPACE_SIZE_16K 4
512 #define BRW_SCRATCH_SPACE_SIZE_32K 5
513 #define BRW_SCRATCH_SPACE_SIZE_64K 6
514 #define BRW_SCRATCH_SPACE_SIZE_128K 7
515 #define BRW_SCRATCH_SPACE_SIZE_256K 8
516 #define BRW_SCRATCH_SPACE_SIZE_512K 9
517 #define BRW_SCRATCH_SPACE_SIZE_1M 10
518 #define BRW_SCRATCH_SPACE_SIZE_2M 11
519
520 #define REG_SIZE (8*4)
521
522 struct brw_instruction {
523 struct {
524 unsigned opcode:7;
525 unsigned pad:1;
526 unsigned access_mode:1;
527 unsigned mask_control:1;
528 unsigned dependency_control:2;
529 unsigned compression_control:2; /* gen6: quater control */
530 unsigned thread_control:2;
531 unsigned predicate_control:4;
532 unsigned predicate_inverse:1;
533 unsigned execution_size:3;
534 /**
535 * Conditional Modifier for most instructions. On Gen6+, this is also
536 * used for the SEND instruction's Message Target/SFID.
537 */
538 unsigned destreg__conditionalmod:4;
539 unsigned acc_wr_control:1;
540 unsigned cmpt_control:1;
541 unsigned debug_control:1;
542 unsigned saturate:1;
543 } header;
544
545 union {
546 struct {
547 unsigned dest_reg_file:2;
548 unsigned dest_reg_type:3;
549 unsigned src0_reg_file:2;
550 unsigned src0_reg_type:3;
551 unsigned src1_reg_file:2;
552 unsigned src1_reg_type:3;
553 unsigned pad:1;
554 unsigned dest_subreg_nr:5;
555 unsigned dest_reg_nr:8;
556 unsigned dest_horiz_stride:2;
557 unsigned dest_address_mode:1;
558 } da1;
559
560 struct {
561 unsigned dest_reg_file:2;
562 unsigned dest_reg_type:3;
563 unsigned src0_reg_file:2;
564 unsigned src0_reg_type:3;
565 unsigned src1_reg_file:2; /* 0x00000c00 */
566 unsigned src1_reg_type:3; /* 0x00007000 */
567 unsigned pad:1;
568 int dest_indirect_offset:10; /* offset against the deref'd address reg */
569 unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
570 unsigned dest_horiz_stride:2;
571 unsigned dest_address_mode:1;
572 } ia1;
573
574 struct {
575 unsigned dest_reg_file:2;
576 unsigned dest_reg_type:3;
577 unsigned src0_reg_file:2;
578 unsigned src0_reg_type:3;
579 unsigned src1_reg_file:2;
580 unsigned src1_reg_type:3;
581 unsigned pad:1;
582 unsigned dest_writemask:4;
583 unsigned dest_subreg_nr:1;
584 unsigned dest_reg_nr:8;
585 unsigned dest_horiz_stride:2;
586 unsigned dest_address_mode:1;
587 } da16;
588
589 struct {
590 unsigned dest_reg_file:2;
591 unsigned dest_reg_type:3;
592 unsigned src0_reg_file:2;
593 unsigned src0_reg_type:3;
594 unsigned pad0:6;
595 unsigned dest_writemask:4;
596 int dest_indirect_offset:6;
597 unsigned dest_subreg_nr:3;
598 unsigned dest_horiz_stride:2;
599 unsigned dest_address_mode:1;
600 } ia16;
601
602 struct {
603 unsigned dest_reg_file:2;
604 unsigned dest_reg_type:3;
605 unsigned src0_reg_file:2;
606 unsigned src0_reg_type:3;
607 unsigned src1_reg_file:2;
608 unsigned src1_reg_type:3;
609 unsigned pad:1;
610
611 int jump_count:16;
612 } branch_gen6;
613
614 struct {
615 unsigned dest_reg_file:1;
616 unsigned flag_subreg_num:1;
617 unsigned pad0:2;
618 unsigned src0_abs:1;
619 unsigned src0_negate:1;
620 unsigned src1_abs:1;
621 unsigned src1_negate:1;
622 unsigned src2_abs:1;
623 unsigned src2_negate:1;
624 unsigned pad1:7;
625 unsigned dest_writemask:4;
626 unsigned dest_subreg_nr:3;
627 unsigned dest_reg_nr:8;
628 } da3src;
629 } bits1;
630
631
632 union {
633 struct {
634 unsigned src0_subreg_nr:5;
635 unsigned src0_reg_nr:8;
636 unsigned src0_abs:1;
637 unsigned src0_negate:1;
638 unsigned src0_address_mode:1;
639 unsigned src0_horiz_stride:2;
640 unsigned src0_width:3;
641 unsigned src0_vert_stride:4;
642 unsigned flag_subreg_nr:1;
643 unsigned flag_reg_nr:1;
644 unsigned pad:5;
645 } da1;
646
647 struct {
648 int src0_indirect_offset:10;
649 unsigned src0_subreg_nr:3;
650 unsigned src0_abs:1;
651 unsigned src0_negate:1;
652 unsigned src0_address_mode:1;
653 unsigned src0_horiz_stride:2;
654 unsigned src0_width:3;
655 unsigned src0_vert_stride:4;
656 unsigned flag_subreg_nr:1;
657 unsigned flag_reg_nr:1;
658 unsigned pad:5;
659 } ia1;
660
661 struct {
662 unsigned src0_swz_x:2;
663 unsigned src0_swz_y:2;
664 unsigned src0_subreg_nr:1;
665 unsigned src0_reg_nr:8;
666 unsigned src0_abs:1;
667 unsigned src0_negate:1;
668 unsigned src0_address_mode:1;
669 unsigned src0_swz_z:2;
670 unsigned src0_swz_w:2;
671 unsigned pad0:1;
672 unsigned src0_vert_stride:4;
673 unsigned flag_subreg_nr:1;
674 unsigned flag_reg_nr:1;
675 unsigned pad1:5;
676 } da16;
677
678 struct {
679 unsigned src0_swz_x:2;
680 unsigned src0_swz_y:2;
681 int src0_indirect_offset:6;
682 unsigned src0_subreg_nr:3;
683 unsigned src0_abs:1;
684 unsigned src0_negate:1;
685 unsigned src0_address_mode:1;
686 unsigned src0_swz_z:2;
687 unsigned src0_swz_w:2;
688 unsigned pad0:1;
689 unsigned src0_vert_stride:4;
690 unsigned flag_subreg_nr:1;
691 unsigned flag_reg_nr:1;
692 unsigned pad1:5;
693 } ia16;
694
695 /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
696 *
697 * Does not apply to Gen6+. The SFID/message target moved to bits
698 * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
699 */
700 struct {
701 unsigned pad:26;
702 unsigned end_of_thread:1;
703 unsigned pad1:1;
704 unsigned sfid:4;
705 } send_gen5; /* for Ironlake only */
706
707 struct {
708 unsigned src0_rep_ctrl:1;
709 unsigned src0_swizzle:8;
710 unsigned src0_subreg_nr:3;
711 unsigned src0_reg_nr:8;
712 unsigned pad0:1;
713 unsigned src1_rep_ctrl:1;
714 unsigned src1_swizzle:8;
715 unsigned src1_subreg_nr_low:2;
716 } da3src;
717 } bits2;
718
719 union {
720 struct {
721 unsigned src1_subreg_nr:5;
722 unsigned src1_reg_nr:8;
723 unsigned src1_abs:1;
724 unsigned src1_negate:1;
725 unsigned src1_address_mode:1;
726 unsigned src1_horiz_stride:2;
727 unsigned src1_width:3;
728 unsigned src1_vert_stride:4;
729 unsigned pad0:7;
730 } da1;
731
732 struct {
733 unsigned src1_swz_x:2;
734 unsigned src1_swz_y:2;
735 unsigned src1_subreg_nr:1;
736 unsigned src1_reg_nr:8;
737 unsigned src1_abs:1;
738 unsigned src1_negate:1;
739 unsigned src1_address_mode:1;
740 unsigned src1_swz_z:2;
741 unsigned src1_swz_w:2;
742 unsigned pad1:1;
743 unsigned src1_vert_stride:4;
744 unsigned pad2:7;
745 } da16;
746
747 struct {
748 int src1_indirect_offset:10;
749 unsigned src1_subreg_nr:3;
750 unsigned src1_abs:1;
751 unsigned src1_negate:1;
752 unsigned src1_address_mode:1;
753 unsigned src1_horiz_stride:2;
754 unsigned src1_width:3;
755 unsigned src1_vert_stride:4;
756 unsigned flag_subreg_nr:1;
757 unsigned flag_reg_nr:1;
758 unsigned pad1:5;
759 } ia1;
760
761 struct {
762 unsigned src1_swz_x:2;
763 unsigned src1_swz_y:2;
764 int src1_indirect_offset:6;
765 unsigned src1_subreg_nr:3;
766 unsigned src1_abs:1;
767 unsigned src1_negate:1;
768 unsigned pad0:1;
769 unsigned src1_swz_z:2;
770 unsigned src1_swz_w:2;
771 unsigned pad1:1;
772 unsigned src1_vert_stride:4;
773 unsigned flag_subreg_nr:1;
774 unsigned flag_reg_nr:1;
775 unsigned pad2:5;
776 } ia16;
777
778 struct {
779 int jump_count:16; /* note: signed */
780 unsigned pop_count:4;
781 unsigned pad0:12;
782 } if_else;
783
784 /* This is also used for gen7 IF/ELSE instructions */
785 struct {
786 /* Signed jump distance to the ip to jump to if all channels
787 * are disabled after the break or continue. It should point
788 * to the end of the innermost control flow block, as that's
789 * where some channel could get re-enabled.
790 */
791 int jip:16;
792
793 /* Signed jump distance to the location to resume execution
794 * of this channel if it's enabled for the break or continue.
795 */
796 int uip:16;
797 } break_cont;
798
799 /**
800 * \defgroup SEND instructions / Message Descriptors
801 *
802 * @{
803 */
804
805 /**
806 * Generic Message Descriptor for Gen4 SEND instructions. The structs
807 * below expand function_control to something specific for their
808 * message. Due to struct packing issues, they duplicate these bits.
809 *
810 * See the G45 PRM, Volume 4, Table 14-15.
811 */
812 struct {
813 unsigned function_control:16;
814 unsigned response_length:4;
815 unsigned msg_length:4;
816 unsigned msg_target:4;
817 unsigned pad1:3;
818 unsigned end_of_thread:1;
819 } generic;
820
821 /**
822 * Generic Message Descriptor for Gen5-7 SEND instructions.
823 *
824 * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most
825 * of the information on the SEND instruction is missing from the public
826 * Ironlake PRM.)
827 *
828 * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
829 * According to the SEND instruction description:
830 * "The MSb of the message description, the EOT field, always comes from
831 * bit 127 of the instruction word"...which is bit 31 of this field.
832 */
833 struct {
834 unsigned function_control:19;
835 unsigned header_present:1;
836 unsigned response_length:5;
837 unsigned msg_length:4;
838 unsigned pad1:2;
839 unsigned end_of_thread:1;
840 } generic_gen5;
841
842 /** G45 PRM, Volume 4, Section 6.1.1.1 */
843 struct {
844 unsigned function:4;
845 unsigned int_type:1;
846 unsigned precision:1;
847 unsigned saturate:1;
848 unsigned data_type:1;
849 unsigned pad0:8;
850 unsigned response_length:4;
851 unsigned msg_length:4;
852 unsigned msg_target:4;
853 unsigned pad1:3;
854 unsigned end_of_thread:1;
855 } math;
856
857 /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
858 struct {
859 unsigned function:4;
860 unsigned int_type:1;
861 unsigned precision:1;
862 unsigned saturate:1;
863 unsigned data_type:1;
864 unsigned snapshot:1;
865 unsigned pad0:10;
866 unsigned header_present:1;
867 unsigned response_length:5;
868 unsigned msg_length:4;
869 unsigned pad1:2;
870 unsigned end_of_thread:1;
871 } math_gen5;
872
873 /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
874 struct {
875 unsigned binding_table_index:8;
876 unsigned sampler:4;
877 unsigned return_format:2;
878 unsigned msg_type:2;
879 unsigned response_length:4;
880 unsigned msg_length:4;
881 unsigned msg_target:4;
882 unsigned pad1:3;
883 unsigned end_of_thread:1;
884 } sampler;
885
886 /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
887 struct {
888 unsigned binding_table_index:8;
889 unsigned sampler:4;
890 unsigned msg_type:4;
891 unsigned response_length:4;
892 unsigned msg_length:4;
893 unsigned msg_target:4;
894 unsigned pad1:3;
895 unsigned end_of_thread:1;
896 } sampler_g4x;
897
898 /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
899 struct {
900 unsigned binding_table_index:8;
901 unsigned sampler:4;
902 unsigned msg_type:4;
903 unsigned simd_mode:2;
904 unsigned pad0:1;
905 unsigned header_present:1;
906 unsigned response_length:5;
907 unsigned msg_length:4;
908 unsigned pad1:2;
909 unsigned end_of_thread:1;
910 } sampler_gen5;
911
912 struct {
913 unsigned binding_table_index:8;
914 unsigned sampler:4;
915 unsigned msg_type:5;
916 unsigned simd_mode:2;
917 unsigned header_present:1;
918 unsigned response_length:5;
919 unsigned msg_length:4;
920 unsigned pad1:2;
921 unsigned end_of_thread:1;
922 } sampler_gen7;
923
924 struct brw_urb_immediate {
925 unsigned opcode:4;
926 unsigned offset:6;
927 unsigned swizzle_control:2;
928 unsigned pad:1;
929 unsigned allocate:1;
930 unsigned used:1;
931 unsigned complete:1;
932 unsigned response_length:4;
933 unsigned msg_length:4;
934 unsigned msg_target:4;
935 unsigned pad1:3;
936 unsigned end_of_thread:1;
937 } urb;
938
939 struct {
940 unsigned opcode:4;
941 unsigned offset:6;
942 unsigned swizzle_control:2;
943 unsigned pad:1;
944 unsigned allocate:1;
945 unsigned used:1;
946 unsigned complete:1;
947 unsigned pad0:3;
948 unsigned header_present:1;
949 unsigned response_length:5;
950 unsigned msg_length:4;
951 unsigned pad1:2;
952 unsigned end_of_thread:1;
953 } urb_gen5;
954
955 struct {
956 unsigned opcode:3;
957 unsigned offset:11;
958 unsigned swizzle_control:1;
959 unsigned complete:1;
960 unsigned per_slot_offset:1;
961 unsigned pad0:2;
962 unsigned header_present:1;
963 unsigned response_length:5;
964 unsigned msg_length:4;
965 unsigned pad1:2;
966 unsigned end_of_thread:1;
967 } urb_gen7;
968
969 /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
970 struct {
971 unsigned binding_table_index:8;
972 unsigned msg_control:4;
973 unsigned msg_type:2;
974 unsigned target_cache:2;
975 unsigned response_length:4;
976 unsigned msg_length:4;
977 unsigned msg_target:4;
978 unsigned pad1:3;
979 unsigned end_of_thread:1;
980 } dp_read;
981
982 /** G45 PRM, Volume 4, Section 5.10.1.1.2 */
983 struct {
984 unsigned binding_table_index:8;
985 unsigned msg_control:3;
986 unsigned msg_type:3;
987 unsigned target_cache:2;
988 unsigned response_length:4;
989 unsigned msg_length:4;
990 unsigned msg_target:4;
991 unsigned pad1:3;
992 unsigned end_of_thread:1;
993 } dp_read_g4x;
994
995 /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
996 struct {
997 unsigned binding_table_index:8;
998 unsigned msg_control:3;
999 unsigned msg_type:3;
1000 unsigned target_cache:2;
1001 unsigned pad0:3;
1002 unsigned header_present:1;
1003 unsigned response_length:5;
1004 unsigned msg_length:4;
1005 unsigned pad1:2;
1006 unsigned end_of_thread:1;
1007 } dp_read_gen5;
1008
1009 /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */
1010 struct {
1011 unsigned binding_table_index:8;
1012 unsigned msg_control:3;
1013 unsigned last_render_target:1;
1014 unsigned msg_type:3;
1015 unsigned send_commit_msg:1;
1016 unsigned response_length:4;
1017 unsigned msg_length:4;
1018 unsigned msg_target:4;
1019 unsigned pad1:3;
1020 unsigned end_of_thread:1;
1021 } dp_write;
1022
1023 /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
1024 struct {
1025 unsigned binding_table_index:8;
1026 unsigned msg_control:3;
1027 unsigned last_render_target:1;
1028 unsigned msg_type:3;
1029 unsigned send_commit_msg:1;
1030 unsigned pad0:3;
1031 unsigned header_present:1;
1032 unsigned response_length:5;
1033 unsigned msg_length:4;
1034 unsigned pad1:2;
1035 unsigned end_of_thread:1;
1036 } dp_write_gen5;
1037
1038 /**
1039 * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
1040 *
1041 * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
1042 **/
1043 struct {
1044 unsigned binding_table_index:8;
1045 unsigned msg_control:5;
1046 unsigned msg_type:3;
1047 unsigned pad0:3;
1048 unsigned header_present:1;
1049 unsigned response_length:5;
1050 unsigned msg_length:4;
1051 unsigned pad1:2;
1052 unsigned end_of_thread:1;
1053 } gen6_dp_sampler_const_cache;
1054
1055 /**
1056 * Message for the Sandybridge Render Cache Data Port.
1057 *
1058 * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
1059 * Section 3.9.2.1.1: Message Descriptor.
1060 *
1061 * "Slot Group Select" and "Last Render Target" are part of the
1062 * 5-bit message control for Render Target Write messages. See
1063 * Section 3.9.9.2.1 of the same volume.
1064 */
1065 struct {
1066 unsigned binding_table_index:8;
1067 unsigned msg_control:3;
1068 unsigned slot_group_select:1;
1069 unsigned last_render_target:1;
1070 unsigned msg_type:4;
1071 unsigned send_commit_msg:1;
1072 unsigned pad0:1;
1073 unsigned header_present:1;
1074 unsigned response_length:5;
1075 unsigned msg_length:4;
1076 unsigned pad1:2;
1077 unsigned end_of_thread:1;
1078 } gen6_dp;
1079
1080 /**
1081 * Message for any of the Gen7 Data Port caches.
1082 *
1083 * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
1084 * Data Port Messages / Message Descriptor. Once again, "Slot Group
1085 * Select" and "Last Render Target" are part of the 6-bit message
1086 * control for Render Target Writes.
1087 */
1088 struct {
1089 unsigned binding_table_index:8;
1090 unsigned msg_control:3;
1091 unsigned slot_group_select:1;
1092 unsigned last_render_target:1;
1093 unsigned msg_control_pad:1;
1094 unsigned msg_type:4;
1095 unsigned pad1:1;
1096 unsigned header_present:1;
1097 unsigned response_length:5;
1098 unsigned msg_length:4;
1099 unsigned pad2:2;
1100 unsigned end_of_thread:1;
1101 } gen7_dp;
1102 /** @} */
1103
1104 struct {
1105 unsigned src1_subreg_nr_high:1;
1106 unsigned src1_reg_nr:8;
1107 unsigned pad0:1;
1108 unsigned src2_rep_ctrl:1;
1109 unsigned src2_swizzle:8;
1110 unsigned src2_subreg_nr:3;
1111 unsigned src2_reg_nr:8;
1112 unsigned pad1:2;
1113 } da3src;
1114
1115 int d;
1116 unsigned ud;
1117 float f;
1118 } bits3;
1119 };
1120
1121
1122 /* These aren't hardware structs, just something useful for us to pass around:
1123 *
1124 * Align1 operation has a lot of control over input ranges. Used in
1125 * WM programs to implement shaders decomposed into "channel serial"
1126 * or "structure of array" form:
1127 */
1128 struct brw_reg {
1129 unsigned type:4;
1130 unsigned file:2;
1131 unsigned nr:8;
1132 unsigned subnr:5; /* :1 in align16 */
1133 unsigned negate:1; /* source only */
1134 unsigned abs:1; /* source only */
1135 unsigned vstride:4; /* source only */
1136 unsigned width:3; /* src only, align1 only */
1137 unsigned hstride:2; /* align1 only */
1138 unsigned address_mode:1; /* relative addressing, hopefully! */
1139 unsigned pad0:1;
1140
1141 union {
1142 struct {
1143 unsigned swizzle:8; /* src only, align16 only */
1144 unsigned writemask:4; /* dest only, align16 only */
1145 int indirect_offset:10; /* relative addressing offset */
1146 unsigned pad1:10; /* two dwords total */
1147 } bits;
1148
1149 float f;
1150 int d;
1151 unsigned ud;
1152 } dw1;
1153 };
1154
1155 struct brw_indirect {
1156 unsigned addr_subnr:4;
1157 int addr_offset:10;
1158 unsigned pad:18;
1159 };
1160
1161 #define BRW_EU_MAX_INSN_STACK 5
1162 #define BRW_EU_MAX_INSN 10000
1163
1164 struct brw_compile {
1165 struct brw_instruction *store;
1166 unsigned nr_insn;
1167
1168 int gen;
1169
1170 /* Allow clients to push/pop instruction state:
1171 */
1172 struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
1173 bool compressed_stack[BRW_EU_MAX_INSN_STACK];
1174 struct brw_instruction *current;
1175
1176 unsigned flag_value;
1177 bool single_program_flow;
1178 bool compressed;
1179
1180 /* Control flow stacks:
1181 * - if_stack contains IF and ELSE instructions which must be patched
1182 * (and popped) once the matching ENDIF instruction is encountered.
1183 */
1184 struct brw_instruction **if_stack;
1185 int if_stack_depth;
1186 int if_stack_array_size;
1187 };
1188
type_sz(unsigned type)1189 static inline int type_sz(unsigned type)
1190 {
1191 switch (type) {
1192 case BRW_REGISTER_TYPE_UD:
1193 case BRW_REGISTER_TYPE_D:
1194 case BRW_REGISTER_TYPE_F:
1195 return 4;
1196 case BRW_REGISTER_TYPE_HF:
1197 case BRW_REGISTER_TYPE_UW:
1198 case BRW_REGISTER_TYPE_W:
1199 return 2;
1200 case BRW_REGISTER_TYPE_UB:
1201 case BRW_REGISTER_TYPE_B:
1202 return 1;
1203 default:
1204 return 0;
1205 }
1206 }
1207
1208 /**
1209 * Construct a brw_reg.
1210 * \param file one of the BRW_x_REGISTER_FILE values
1211 * \param nr register number/index
1212 * \param subnr register sub number
1213 * \param type one of BRW_REGISTER_TYPE_x
1214 * \param vstride one of BRW_VERTICAL_STRIDE_x
1215 * \param width one of BRW_WIDTH_x
1216 * \param hstride one of BRW_HORIZONTAL_STRIDE_x
1217 * \param swizzle one of BRW_SWIZZLE_x
1218 * \param writemask WRITEMASK_X/Y/Z/W bitfield
1219 */
brw_reg(unsigned file,unsigned nr,unsigned subnr,unsigned type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)1220 static inline struct brw_reg brw_reg(unsigned file,
1221 unsigned nr,
1222 unsigned subnr,
1223 unsigned type,
1224 unsigned vstride,
1225 unsigned width,
1226 unsigned hstride,
1227 unsigned swizzle,
1228 unsigned writemask)
1229 {
1230 struct brw_reg reg;
1231 if (file == BRW_GENERAL_REGISTER_FILE)
1232 assert(nr < BRW_MAX_GRF);
1233 else if (file == BRW_MESSAGE_REGISTER_FILE)
1234 assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1235 else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
1236 assert(nr <= BRW_ARF_IP);
1237
1238 reg.type = type;
1239 reg.file = file;
1240 reg.nr = nr;
1241 reg.subnr = subnr * type_sz(type);
1242 reg.negate = 0;
1243 reg.abs = 0;
1244 reg.vstride = vstride;
1245 reg.width = width;
1246 reg.hstride = hstride;
1247 reg.address_mode = BRW_ADDRESS_DIRECT;
1248 reg.pad0 = 0;
1249
1250 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
1251 * set swizzle and writemask to W, as the lower bits of subnr will
1252 * be lost when converted to align16. This is probably too much to
1253 * keep track of as you'd want it adjusted by suboffset(), etc.
1254 * Perhaps fix up when converting to align16?
1255 */
1256 reg.dw1.bits.swizzle = swizzle;
1257 reg.dw1.bits.writemask = writemask;
1258 reg.dw1.bits.indirect_offset = 0;
1259 reg.dw1.bits.pad1 = 0;
1260 return reg;
1261 }
1262
1263 /** Construct float[16] register */
brw_vec16_reg(unsigned file,unsigned nr,unsigned subnr)1264 static inline struct brw_reg brw_vec16_reg(unsigned file,
1265 unsigned nr,
1266 unsigned subnr)
1267 {
1268 return brw_reg(file,
1269 nr,
1270 subnr,
1271 BRW_REGISTER_TYPE_F,
1272 BRW_VERTICAL_STRIDE_16,
1273 BRW_WIDTH_16,
1274 BRW_HORIZONTAL_STRIDE_1,
1275 BRW_SWIZZLE_XYZW,
1276 WRITEMASK_XYZW);
1277 }
1278
1279 /** Construct float[8] register */
brw_vec8_reg(unsigned file,unsigned nr,unsigned subnr)1280 static inline struct brw_reg brw_vec8_reg(unsigned file,
1281 unsigned nr,
1282 unsigned subnr)
1283 {
1284 return brw_reg(file,
1285 nr,
1286 subnr,
1287 BRW_REGISTER_TYPE_F,
1288 BRW_VERTICAL_STRIDE_8,
1289 BRW_WIDTH_8,
1290 BRW_HORIZONTAL_STRIDE_1,
1291 BRW_SWIZZLE_XYZW,
1292 WRITEMASK_XYZW);
1293 }
1294
1295 /** Construct float[4] register */
brw_vec4_reg(unsigned file,unsigned nr,unsigned subnr)1296 static inline struct brw_reg brw_vec4_reg(unsigned file,
1297 unsigned nr,
1298 unsigned subnr)
1299 {
1300 return brw_reg(file,
1301 nr,
1302 subnr,
1303 BRW_REGISTER_TYPE_F,
1304 BRW_VERTICAL_STRIDE_4,
1305 BRW_WIDTH_4,
1306 BRW_HORIZONTAL_STRIDE_1,
1307 BRW_SWIZZLE_XYZW,
1308 WRITEMASK_XYZW);
1309 }
1310
1311 /** Construct float[2] register */
brw_vec2_reg(unsigned file,unsigned nr,unsigned subnr)1312 static inline struct brw_reg brw_vec2_reg(unsigned file,
1313 unsigned nr,
1314 unsigned subnr)
1315 {
1316 return brw_reg(file,
1317 nr,
1318 subnr,
1319 BRW_REGISTER_TYPE_F,
1320 BRW_VERTICAL_STRIDE_2,
1321 BRW_WIDTH_2,
1322 BRW_HORIZONTAL_STRIDE_1,
1323 BRW_SWIZZLE_XYXY,
1324 WRITEMASK_XY);
1325 }
1326
1327 /** Construct float[1] register */
brw_vec1_reg(unsigned file,unsigned nr,unsigned subnr)1328 static inline struct brw_reg brw_vec1_reg(unsigned file,
1329 unsigned nr,
1330 unsigned subnr)
1331 {
1332 return brw_reg(file,
1333 nr,
1334 subnr,
1335 BRW_REGISTER_TYPE_F,
1336 BRW_VERTICAL_STRIDE_0,
1337 BRW_WIDTH_1,
1338 BRW_HORIZONTAL_STRIDE_0,
1339 BRW_SWIZZLE_XXXX,
1340 WRITEMASK_X);
1341 }
1342
1343
__retype(struct brw_reg reg,unsigned type)1344 static inline struct brw_reg __retype(struct brw_reg reg,
1345 unsigned type)
1346 {
1347 reg.type = type;
1348 return reg;
1349 }
1350
__retype_d(struct brw_reg reg)1351 static inline struct brw_reg __retype_d(struct brw_reg reg)
1352 {
1353 return __retype(reg, BRW_REGISTER_TYPE_D);
1354 }
1355
__retype_ud(struct brw_reg reg)1356 static inline struct brw_reg __retype_ud(struct brw_reg reg)
1357 {
1358 return __retype(reg, BRW_REGISTER_TYPE_UD);
1359 }
1360
__retype_uw(struct brw_reg reg)1361 static inline struct brw_reg __retype_uw(struct brw_reg reg)
1362 {
1363 return __retype(reg, BRW_REGISTER_TYPE_UW);
1364 }
1365
__sechalf(struct brw_reg reg)1366 static inline struct brw_reg __sechalf(struct brw_reg reg)
1367 {
1368 if (reg.vstride)
1369 reg.nr++;
1370 return reg;
1371 }
1372
__suboffset(struct brw_reg reg,unsigned delta)1373 static inline struct brw_reg __suboffset(struct brw_reg reg,
1374 unsigned delta)
1375 {
1376 reg.subnr += delta * type_sz(reg.type);
1377 return reg;
1378 }
1379
__offset(struct brw_reg reg,unsigned delta)1380 static inline struct brw_reg __offset(struct brw_reg reg,
1381 unsigned delta)
1382 {
1383 reg.nr += delta;
1384 return reg;
1385 }
1386
byte_offset(struct brw_reg reg,unsigned bytes)1387 static inline struct brw_reg byte_offset(struct brw_reg reg,
1388 unsigned bytes)
1389 {
1390 unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
1391 reg.nr = newoffset / REG_SIZE;
1392 reg.subnr = newoffset % REG_SIZE;
1393 return reg;
1394 }
1395
1396
1397 /** Construct unsigned word[16] register */
brw_uw16_reg(unsigned file,unsigned nr,unsigned subnr)1398 static inline struct brw_reg brw_uw16_reg(unsigned file,
1399 unsigned nr,
1400 unsigned subnr)
1401 {
1402 return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1403 }
1404
1405 /** Construct unsigned word[8] register */
brw_uw8_reg(unsigned file,unsigned nr,unsigned subnr)1406 static inline struct brw_reg brw_uw8_reg(unsigned file,
1407 unsigned nr,
1408 unsigned subnr)
1409 {
1410 return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1411 }
1412
1413 /** Construct unsigned word[1] register */
brw_uw1_reg(unsigned file,unsigned nr,unsigned subnr)1414 static inline struct brw_reg brw_uw1_reg(unsigned file,
1415 unsigned nr,
1416 unsigned subnr)
1417 {
1418 return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1419 }
1420
brw_imm_reg(unsigned type)1421 static inline struct brw_reg brw_imm_reg(unsigned type)
1422 {
1423 return brw_reg( BRW_IMMEDIATE_VALUE,
1424 0,
1425 0,
1426 type,
1427 BRW_VERTICAL_STRIDE_0,
1428 BRW_WIDTH_1,
1429 BRW_HORIZONTAL_STRIDE_0,
1430 0,
1431 0);
1432 }
1433
1434 /** Construct float immediate register */
brw_imm_f(float f)1435 static inline struct brw_reg brw_imm_f(float f)
1436 {
1437 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
1438 imm.dw1.f = f;
1439 return imm;
1440 }
1441
1442 /** Construct integer immediate register */
brw_imm_d(int d)1443 static inline struct brw_reg brw_imm_d(int d)
1444 {
1445 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
1446 imm.dw1.d = d;
1447 return imm;
1448 }
1449
1450 /** Construct uint immediate register */
brw_imm_ud(unsigned ud)1451 static inline struct brw_reg brw_imm_ud(unsigned ud)
1452 {
1453 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
1454 imm.dw1.ud = ud;
1455 return imm;
1456 }
1457
1458 /** Construct ushort immediate register */
brw_imm_uw(uint16_t uw)1459 static inline struct brw_reg brw_imm_uw(uint16_t uw)
1460 {
1461 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
1462 imm.dw1.ud = uw | (uw << 16);
1463 return imm;
1464 }
1465
1466 /** Construct short immediate register */
brw_imm_w(int16_t w)1467 static inline struct brw_reg brw_imm_w(int16_t w)
1468 {
1469 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
1470 imm.dw1.d = w | (w << 16);
1471 return imm;
1472 }
1473
1474 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
1475 * numbers alias with _V and _VF below:
1476 */
1477
1478 /** Construct vector of eight signed half-byte values */
brw_imm_v(unsigned v)1479 static inline struct brw_reg brw_imm_v(unsigned v)
1480 {
1481 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
1482 imm.vstride = BRW_VERTICAL_STRIDE_0;
1483 imm.width = BRW_WIDTH_8;
1484 imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1485 imm.dw1.ud = v;
1486 return imm;
1487 }
1488
1489 /** Construct vector of four 8-bit float values */
brw_imm_vf(unsigned v)1490 static inline struct brw_reg brw_imm_vf(unsigned v)
1491 {
1492 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1493 imm.vstride = BRW_VERTICAL_STRIDE_0;
1494 imm.width = BRW_WIDTH_4;
1495 imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1496 imm.dw1.ud = v;
1497 return imm;
1498 }
1499
1500 #define VF_ZERO 0x0
1501 #define VF_ONE 0x30
1502 #define VF_NEG (1<<7)
1503
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)1504 static inline struct brw_reg brw_imm_vf4(unsigned v0,
1505 unsigned v1,
1506 unsigned v2,
1507 unsigned v3)
1508 {
1509 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1510 imm.vstride = BRW_VERTICAL_STRIDE_0;
1511 imm.width = BRW_WIDTH_4;
1512 imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1513 imm.dw1.ud = ((v0 << 0) |
1514 (v1 << 8) |
1515 (v2 << 16) |
1516 (v3 << 24));
1517 return imm;
1518 }
1519
brw_address(struct brw_reg reg)1520 static inline struct brw_reg brw_address(struct brw_reg reg)
1521 {
1522 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
1523 }
1524
1525 /** Construct float[1] general-purpose register */
brw_vec1_grf(unsigned nr,unsigned subnr)1526 static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
1527 {
1528 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1529 }
1530
1531 /** Construct float[2] general-purpose register */
brw_vec2_grf(unsigned nr,unsigned subnr)1532 static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
1533 {
1534 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1535 }
1536
1537 /** Construct float[4] general-purpose register */
brw_vec4_grf(unsigned nr,unsigned subnr)1538 static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
1539 {
1540 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1541 }
1542
1543 /** Construct float[8] general-purpose register */
brw_vec8_grf(unsigned nr,unsigned subnr)1544 static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
1545 {
1546 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1547 }
1548
brw_uw8_grf(unsigned nr,unsigned subnr)1549 static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
1550 {
1551 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1552 }
1553
brw_uw16_grf(unsigned nr,unsigned subnr)1554 static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
1555 {
1556 return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1557 }
1558
1559 /** Construct null register (usually used for setting condition codes) */
brw_null_reg(void)1560 static inline struct brw_reg brw_null_reg(void)
1561 {
1562 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1563 BRW_ARF_NULL,
1564 0);
1565 }
1566
brw_address_reg(unsigned subnr)1567 static inline struct brw_reg brw_address_reg(unsigned subnr)
1568 {
1569 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1570 BRW_ARF_ADDRESS,
1571 subnr);
1572 }
1573
1574 /* If/else instructions break in align16 mode if writemask & swizzle
1575 * aren't xyzw. This goes against the convention for other scalar
1576 * regs:
1577 */
brw_ip_reg(void)1578 static inline struct brw_reg brw_ip_reg(void)
1579 {
1580 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1581 BRW_ARF_IP,
1582 0,
1583 BRW_REGISTER_TYPE_UD,
1584 BRW_VERTICAL_STRIDE_4, /* ? */
1585 BRW_WIDTH_1,
1586 BRW_HORIZONTAL_STRIDE_0,
1587 BRW_SWIZZLE_XYZW, /* NOTE! */
1588 WRITEMASK_XYZW); /* NOTE! */
1589 }
1590
brw_acc_reg(void)1591 static inline struct brw_reg brw_acc_reg(void)
1592 {
1593 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1594 BRW_ARF_ACCUMULATOR,
1595 0);
1596 }
1597
brw_notification_1_reg(void)1598 static inline struct brw_reg brw_notification_1_reg(void)
1599 {
1600 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1601 BRW_ARF_NOTIFICATION_COUNT,
1602 1,
1603 BRW_REGISTER_TYPE_UD,
1604 BRW_VERTICAL_STRIDE_0,
1605 BRW_WIDTH_1,
1606 BRW_HORIZONTAL_STRIDE_0,
1607 BRW_SWIZZLE_XXXX,
1608 WRITEMASK_X);
1609 }
1610
brw_flag_reg(void)1611 static inline struct brw_reg brw_flag_reg(void)
1612 {
1613 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1614 BRW_ARF_FLAG,
1615 0);
1616 }
1617
brw_mask_reg(unsigned subnr)1618 static inline struct brw_reg brw_mask_reg(unsigned subnr)
1619 {
1620 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1621 BRW_ARF_MASK,
1622 subnr);
1623 }
1624
brw_message_reg(unsigned nr)1625 static inline struct brw_reg brw_message_reg(unsigned nr)
1626 {
1627 assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1628 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
1629 }
1630
brw_message4_reg(unsigned nr,int subnr)1631 static inline struct brw_reg brw_message4_reg(unsigned nr,
1632 int subnr)
1633 {
1634 assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1635 return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
1636 }
1637
1638 /* This is almost always called with a numeric constant argument, so
1639 * make things easy to evaluate at compile time:
1640 */
cvt(unsigned val)1641 static inline unsigned cvt(unsigned val)
1642 {
1643 switch (val) {
1644 case 0: return 0;
1645 case 1: return 1;
1646 case 2: return 2;
1647 case 4: return 3;
1648 case 8: return 4;
1649 case 16: return 5;
1650 case 32: return 6;
1651 }
1652 return 0;
1653 }
1654
__stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1655 static inline struct brw_reg __stride(struct brw_reg reg,
1656 unsigned vstride,
1657 unsigned width,
1658 unsigned hstride)
1659 {
1660 reg.vstride = cvt(vstride);
1661 reg.width = cvt(width) - 1;
1662 reg.hstride = cvt(hstride);
1663 return reg;
1664 }
1665
vec16(struct brw_reg reg)1666 static inline struct brw_reg vec16(struct brw_reg reg)
1667 {
1668 return __stride(reg, 16,16,1);
1669 }
1670
vec8(struct brw_reg reg)1671 static inline struct brw_reg vec8(struct brw_reg reg)
1672 {
1673 return __stride(reg, 8,8,1);
1674 }
1675
vec4(struct brw_reg reg)1676 static inline struct brw_reg vec4(struct brw_reg reg)
1677 {
1678 return __stride(reg, 4,4,1);
1679 }
1680
vec2(struct brw_reg reg)1681 static inline struct brw_reg vec2(struct brw_reg reg)
1682 {
1683 return __stride(reg, 2,2,1);
1684 }
1685
vec1(struct brw_reg reg)1686 static inline struct brw_reg vec1(struct brw_reg reg)
1687 {
1688 return __stride(reg, 0,1,0);
1689 }
1690
get_element(struct brw_reg reg,unsigned elt)1691 static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
1692 {
1693 return vec1(__suboffset(reg, elt));
1694 }
1695
get_element_ud(struct brw_reg reg,unsigned elt)1696 static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
1697 {
1698 return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
1699 }
1700
brw_swizzle(struct brw_reg reg,unsigned x,unsigned y,unsigned z,unsigned w)1701 static inline struct brw_reg brw_swizzle(struct brw_reg reg,
1702 unsigned x,
1703 unsigned y,
1704 unsigned z,
1705 unsigned w)
1706 {
1707 assert(reg.file != BRW_IMMEDIATE_VALUE);
1708
1709 reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
1710 BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
1711 BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
1712 BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
1713 return reg;
1714 }
1715
brw_swizzle1(struct brw_reg reg,unsigned x)1716 static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
1717 unsigned x)
1718 {
1719 return brw_swizzle(reg, x, x, x, x);
1720 }
1721
brw_writemask(struct brw_reg reg,unsigned mask)1722 static inline struct brw_reg brw_writemask(struct brw_reg reg,
1723 unsigned mask)
1724 {
1725 assert(reg.file != BRW_IMMEDIATE_VALUE);
1726 reg.dw1.bits.writemask &= mask;
1727 return reg;
1728 }
1729
brw_set_writemask(struct brw_reg reg,unsigned mask)1730 static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
1731 unsigned mask)
1732 {
1733 assert(reg.file != BRW_IMMEDIATE_VALUE);
1734 reg.dw1.bits.writemask = mask;
1735 return reg;
1736 }
1737
brw_negate(struct brw_reg reg)1738 static inline struct brw_reg brw_negate(struct brw_reg reg)
1739 {
1740 reg.negate ^= 1;
1741 return reg;
1742 }
1743
brw_abs(struct brw_reg reg)1744 static inline struct brw_reg brw_abs(struct brw_reg reg)
1745 {
1746 reg.abs = 1;
1747 return reg;
1748 }
1749
1750 /***********************************************************************
1751 */
brw_vec4_indirect(unsigned subnr,int offset)1752 static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
1753 int offset)
1754 {
1755 struct brw_reg reg = brw_vec4_grf(0, 0);
1756 reg.subnr = subnr;
1757 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1758 reg.dw1.bits.indirect_offset = offset;
1759 return reg;
1760 }
1761
brw_vec1_indirect(unsigned subnr,int offset)1762 static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
1763 int offset)
1764 {
1765 struct brw_reg reg = brw_vec1_grf(0, 0);
1766 reg.subnr = subnr;
1767 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1768 reg.dw1.bits.indirect_offset = offset;
1769 return reg;
1770 }
1771
deref_4f(struct brw_indirect ptr,int offset)1772 static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
1773 {
1774 return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1775 }
1776
deref_1f(struct brw_indirect ptr,int offset)1777 static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
1778 {
1779 return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1780 }
1781
deref_4b(struct brw_indirect ptr,int offset)1782 static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
1783 {
1784 return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1785 }
1786
deref_1uw(struct brw_indirect ptr,int offset)1787 static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
1788 {
1789 return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1790 }
1791
deref_1d(struct brw_indirect ptr,int offset)1792 static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
1793 {
1794 return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1795 }
1796
deref_1ud(struct brw_indirect ptr,int offset)1797 static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
1798 {
1799 return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1800 }
1801
get_addr_reg(struct brw_indirect ptr)1802 static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
1803 {
1804 return brw_address_reg(ptr.addr_subnr);
1805 }
1806
brw_indirect_offset(struct brw_indirect ptr,int offset)1807 static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
1808 {
1809 ptr.addr_offset += offset;
1810 return ptr;
1811 }
1812
brw_indirect(unsigned addr_subnr,int offset)1813 static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
1814 {
1815 struct brw_indirect ptr;
1816 ptr.addr_subnr = addr_subnr;
1817 ptr.addr_offset = offset;
1818 ptr.pad = 0;
1819 return ptr;
1820 }
1821
1822 /** Do two brw_regs refer to the same register? */
brw_same_reg(struct brw_reg r1,struct brw_reg r2)1823 static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
1824 {
1825 return r1.file == r2.file && r1.nr == r2.nr;
1826 }
1827
current_insn(struct brw_compile * p)1828 static inline struct brw_instruction *current_insn( struct brw_compile *p)
1829 {
1830 return &p->store[p->nr_insn];
1831 }
1832
brw_set_predicate_control(struct brw_compile * p,unsigned pc)1833 static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
1834 {
1835 p->current->header.predicate_control = pc;
1836 }
1837
brw_set_predicate_inverse(struct brw_compile * p,bool predicate_inverse)1838 static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
1839 {
1840 p->current->header.predicate_inverse = predicate_inverse;
1841 }
1842
brw_set_conditionalmod(struct brw_compile * p,unsigned conditional)1843 static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
1844 {
1845 p->current->header.destreg__conditionalmod = conditional;
1846 }
1847
brw_set_access_mode(struct brw_compile * p,unsigned access_mode)1848 static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
1849 {
1850 p->current->header.access_mode = access_mode;
1851 }
1852
brw_set_mask_control(struct brw_compile * p,unsigned value)1853 static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
1854 {
1855 p->current->header.mask_control = value;
1856 }
1857
brw_set_saturate(struct brw_compile * p,unsigned value)1858 static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
1859 {
1860 p->current->header.saturate = value;
1861 }
1862
brw_set_acc_write_control(struct brw_compile * p,unsigned value)1863 static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
1864 {
1865 if (p->gen >= 060)
1866 p->current->header.acc_wr_control = value;
1867 }
1868
1869 void brw_pop_insn_state(struct brw_compile *p);
1870 void brw_push_insn_state(struct brw_compile *p);
1871 void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
1872 void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
1873
1874 void brw_compile_init(struct brw_compile *p, int gen, void *store);
1875
1876 void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
1877 struct brw_reg dest);
1878 void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
1879 struct brw_reg reg);
1880 void brw_set_src1(struct brw_compile *p,
1881 struct brw_instruction *insn,
1882 struct brw_reg reg);
1883
1884 void gen6_resolve_implied_move(struct brw_compile *p,
1885 struct brw_reg *src,
1886 unsigned msg_reg_nr);
1887
1888 static inline struct brw_instruction *
brw_next_insn(struct brw_compile * p,unsigned opcode)1889 brw_next_insn(struct brw_compile *p, unsigned opcode)
1890 {
1891 struct brw_instruction *insn;
1892
1893 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
1894
1895 insn = &p->store[p->nr_insn++];
1896 *insn = *p->current;
1897
1898 if (p->current->header.destreg__conditionalmod) {
1899 p->current->header.destreg__conditionalmod = 0;
1900 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1901 }
1902
1903 insn->header.opcode = opcode;
1904 return insn;
1905 }
1906
1907 /* Helpers for regular instructions: */
1908 #define ALU1(OP) \
1909 static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
1910 struct brw_reg dest, \
1911 struct brw_reg src0) \
1912 { \
1913 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
1914 }
1915
1916 #define ALU2(OP) \
1917 static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
1918 struct brw_reg dest, \
1919 struct brw_reg src0, \
1920 struct brw_reg src1) \
1921 { \
1922 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
1923 }
1924
1925 /* Rounding operations (other than RNDD) require two instructions - the first
1926 * stores a rounded value (possibly the wrong way) in the dest register, but
1927 * also sets a per-channel "increment bit" in the flag register. A predicated
1928 * add of 1.0 fixes dest to contain the desired result.
1929 *
1930 * Sandybridge and later appear to round correctly without an ADD.
1931 */
1932 #define ROUND(OP) \
1933 static inline void brw_##OP(struct brw_compile *p, \
1934 struct brw_reg dest, \
1935 struct brw_reg src) \
1936 { \
1937 struct brw_instruction *rnd, *add; \
1938 rnd = brw_next_insn(p, BRW_OPCODE_##OP); \
1939 brw_set_dest(p, rnd, dest); \
1940 brw_set_src0(p, rnd, src); \
1941 if (p->gen < 060) { \
1942 /* turn on round-increments */ \
1943 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1944 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1945 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1946 } \
1947 }
1948
brw_alu1(struct brw_compile * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)1949 static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
1950 unsigned opcode,
1951 struct brw_reg dest,
1952 struct brw_reg src)
1953 {
1954 struct brw_instruction *insn = brw_next_insn(p, opcode);
1955 brw_set_dest(p, insn, dest);
1956 brw_set_src0(p, insn, src);
1957 return insn;
1958 }
1959
brw_alu2(struct brw_compile * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1960 static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
1961 unsigned opcode,
1962 struct brw_reg dest,
1963 struct brw_reg src0,
1964 struct brw_reg src1 )
1965 {
1966 struct brw_instruction *insn = brw_next_insn(p, opcode);
1967 brw_set_dest(p, insn, dest);
1968 brw_set_src0(p, insn, src0);
1969 brw_set_src1(p, insn, src1);
1970 return insn;
1971 }
1972
brw_ADD(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1973 static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
1974 struct brw_reg dest,
1975 struct brw_reg src0,
1976 struct brw_reg src1)
1977 {
1978 /* 6.2.2: add */
1979 if (src0.type == BRW_REGISTER_TYPE_F ||
1980 (src0.file == BRW_IMMEDIATE_VALUE &&
1981 src0.type == BRW_REGISTER_TYPE_VF)) {
1982 assert(src1.type != BRW_REGISTER_TYPE_UD);
1983 assert(src1.type != BRW_REGISTER_TYPE_D);
1984 }
1985
1986 if (src1.type == BRW_REGISTER_TYPE_F ||
1987 (src1.file == BRW_IMMEDIATE_VALUE &&
1988 src1.type == BRW_REGISTER_TYPE_VF)) {
1989 assert(src0.type != BRW_REGISTER_TYPE_UD);
1990 assert(src0.type != BRW_REGISTER_TYPE_D);
1991 }
1992
1993 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1994 }
1995
brw_MUL(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1996 static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
1997 struct brw_reg dest,
1998 struct brw_reg src0,
1999 struct brw_reg src1)
2000 {
2001 /* 6.32.38: mul */
2002 if (src0.type == BRW_REGISTER_TYPE_D ||
2003 src0.type == BRW_REGISTER_TYPE_UD ||
2004 src1.type == BRW_REGISTER_TYPE_D ||
2005 src1.type == BRW_REGISTER_TYPE_UD) {
2006 assert(dest.type != BRW_REGISTER_TYPE_F);
2007 }
2008
2009 if (src0.type == BRW_REGISTER_TYPE_F ||
2010 (src0.file == BRW_IMMEDIATE_VALUE &&
2011 src0.type == BRW_REGISTER_TYPE_VF)) {
2012 assert(src1.type != BRW_REGISTER_TYPE_UD);
2013 assert(src1.type != BRW_REGISTER_TYPE_D);
2014 }
2015
2016 if (src1.type == BRW_REGISTER_TYPE_F ||
2017 (src1.file == BRW_IMMEDIATE_VALUE &&
2018 src1.type == BRW_REGISTER_TYPE_VF)) {
2019 assert(src0.type != BRW_REGISTER_TYPE_UD);
2020 assert(src0.type != BRW_REGISTER_TYPE_D);
2021 }
2022
2023 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2024 src0.nr != BRW_ARF_ACCUMULATOR);
2025 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2026 src1.nr != BRW_ARF_ACCUMULATOR);
2027
2028 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
2029 }
2030
brw_JMPI(struct brw_compile * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)2031 static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
2032 struct brw_reg dest,
2033 struct brw_reg src0,
2034 struct brw_reg src1)
2035 {
2036 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
2037
2038 insn->header.execution_size = 1;
2039 insn->header.compression_control = BRW_COMPRESSION_NONE;
2040 insn->header.mask_control = BRW_MASK_DISABLE;
2041
2042 p->current->header.predicate_control = BRW_PREDICATE_NONE;
2043
2044 return insn;
2045 }
2046
2047
2048 ALU1(MOV);
2049 ALU2(SEL);
2050 ALU1(NOT);
2051 ALU2(AND);
2052 ALU2(OR);
2053 ALU2(XOR);
2054 ALU2(SHR);
2055 ALU2(SHL);
2056 ALU2(RSR);
2057 ALU2(RSL);
2058 ALU2(ASR);
2059 ALU1(FRC);
2060 ALU1(RNDD);
2061 ALU2(MAC);
2062 ALU2(MACH);
2063 ALU1(LZD);
2064 ALU2(DP4);
2065 ALU2(DPH);
2066 ALU2(DP3);
2067 ALU2(DP2);
2068 ALU2(LINE);
2069 ALU2(PLN);
2070
2071 ROUND(RNDZ);
2072 ROUND(RNDE);
2073
2074 #undef ALU1
2075 #undef ALU2
2076 #undef ROUND
2077
2078 /* Helpers for SEND instruction */
2079 void brw_set_dp_read_message(struct brw_compile *p,
2080 struct brw_instruction *insn,
2081 unsigned binding_table_index,
2082 unsigned msg_control,
2083 unsigned msg_type,
2084 unsigned target_cache,
2085 unsigned msg_length,
2086 unsigned response_length);
2087
2088 void brw_set_dp_write_message(struct brw_compile *p,
2089 struct brw_instruction *insn,
2090 unsigned binding_table_index,
2091 unsigned msg_control,
2092 unsigned msg_type,
2093 unsigned msg_length,
2094 bool header_present,
2095 bool last_render_target,
2096 unsigned response_length,
2097 bool end_of_thread,
2098 bool send_commit_msg);
2099
2100 void brw_urb_WRITE(struct brw_compile *p,
2101 struct brw_reg dest,
2102 unsigned msg_reg_nr,
2103 struct brw_reg src0,
2104 bool allocate,
2105 bool used,
2106 unsigned msg_length,
2107 unsigned response_length,
2108 bool eot,
2109 bool writes_complete,
2110 unsigned offset,
2111 unsigned swizzle);
2112
2113 void brw_ff_sync(struct brw_compile *p,
2114 struct brw_reg dest,
2115 unsigned msg_reg_nr,
2116 struct brw_reg src0,
2117 bool allocate,
2118 unsigned response_length,
2119 bool eot);
2120
2121 void brw_fb_WRITE(struct brw_compile *p,
2122 int dispatch_width,
2123 unsigned msg_reg_nr,
2124 struct brw_reg src0,
2125 unsigned msg_control,
2126 unsigned binding_table_index,
2127 unsigned msg_length,
2128 unsigned response_length,
2129 bool eot,
2130 bool header_present);
2131
2132 void brw_SAMPLE(struct brw_compile *p,
2133 struct brw_reg dest,
2134 unsigned msg_reg_nr,
2135 struct brw_reg src0,
2136 unsigned binding_table_index,
2137 unsigned sampler,
2138 unsigned writemask,
2139 unsigned msg_type,
2140 unsigned response_length,
2141 unsigned msg_length,
2142 bool header_present,
2143 unsigned simd_mode);
2144
2145 void brw_math_16(struct brw_compile *p,
2146 struct brw_reg dest,
2147 unsigned function,
2148 unsigned saturate,
2149 unsigned msg_reg_nr,
2150 struct brw_reg src,
2151 unsigned precision);
2152
2153 void brw_math(struct brw_compile *p,
2154 struct brw_reg dest,
2155 unsigned function,
2156 unsigned saturate,
2157 unsigned msg_reg_nr,
2158 struct brw_reg src,
2159 unsigned data_type,
2160 unsigned precision);
2161
2162 void brw_math2(struct brw_compile *p,
2163 struct brw_reg dest,
2164 unsigned function,
2165 struct brw_reg src0,
2166 struct brw_reg src1);
2167
2168 void brw_oword_block_read(struct brw_compile *p,
2169 struct brw_reg dest,
2170 struct brw_reg mrf,
2171 uint32_t offset,
2172 uint32_t bind_table_index);
2173
2174 void brw_oword_block_read_scratch(struct brw_compile *p,
2175 struct brw_reg dest,
2176 struct brw_reg mrf,
2177 int num_regs,
2178 unsigned offset);
2179
2180 void brw_oword_block_write_scratch(struct brw_compile *p,
2181 struct brw_reg mrf,
2182 int num_regs,
2183 unsigned offset);
2184
2185 void brw_dword_scattered_read(struct brw_compile *p,
2186 struct brw_reg dest,
2187 struct brw_reg mrf,
2188 uint32_t bind_table_index);
2189
2190 void brw_dp_READ_4_vs(struct brw_compile *p,
2191 struct brw_reg dest,
2192 unsigned location,
2193 unsigned bind_table_index);
2194
2195 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2196 struct brw_reg dest,
2197 struct brw_reg addrReg,
2198 unsigned offset,
2199 unsigned bind_table_index);
2200
2201 /* If/else/endif. Works by manipulating the execution flags on each
2202 * channel.
2203 */
2204 struct brw_instruction *brw_IF(struct brw_compile *p,
2205 unsigned execute_size);
2206 struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
2207 struct brw_reg src0, struct brw_reg src1);
2208
2209 void brw_ELSE(struct brw_compile *p);
2210 void brw_ENDIF(struct brw_compile *p);
2211
2212 /* DO/WHILE loops:
2213 */
2214 struct brw_instruction *brw_DO(struct brw_compile *p,
2215 unsigned execute_size);
2216
2217 struct brw_instruction *brw_WHILE(struct brw_compile *p,
2218 struct brw_instruction *patch_insn);
2219
2220 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
2221 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
2222 struct brw_instruction *gen6_CONT(struct brw_compile *p,
2223 struct brw_instruction *do_insn);
2224 /* Forward jumps:
2225 */
2226 void brw_land_fwd_jump(struct brw_compile *p,
2227 struct brw_instruction *jmp_insn);
2228
2229 void brw_NOP(struct brw_compile *p);
2230
2231 void brw_WAIT(struct brw_compile *p);
2232
2233 /* Special case: there is never a destination, execution size will be
2234 * taken from src0:
2235 */
2236 void brw_CMP(struct brw_compile *p,
2237 struct brw_reg dest,
2238 unsigned conditional,
2239 struct brw_reg src0,
2240 struct brw_reg src1);
2241
brw_math_invert(struct brw_compile * p,struct brw_reg dst,struct brw_reg src)2242 static inline void brw_math_invert(struct brw_compile *p,
2243 struct brw_reg dst,
2244 struct brw_reg src)
2245 {
2246 brw_math(p,
2247 dst,
2248 BRW_MATH_FUNCTION_INV,
2249 BRW_MATH_SATURATE_NONE,
2250 0,
2251 src,
2252 BRW_MATH_PRECISION_FULL,
2253 BRW_MATH_DATA_VECTOR);
2254 }
2255
2256 void brw_set_uip_jip(struct brw_compile *p);
2257
2258 uint32_t brw_swap_cmod(uint32_t cmod);
2259
2260 void brw_disasm(FILE *file,
2261 const struct brw_instruction *inst,
2262 int gen);
2263
2264 #endif
2265