1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdarg.h>
29
30 #include "i915_context.h"
31 #include "i915_debug.h"
32 #include "i915_debug_private.h"
33 #include "i915_fpc.h"
34 #include "i915_reg.h"
35
36 #include "pipe/p_shader_tokens.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "util/log.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_string.h"
45
46 #include "draw/draw_vertex.h"
47
48 #ifndef M_PI
49 #define M_PI 3.14159265358979323846
50 #endif
51
52 /**
53 * Simple pass-through fragment shader to use when we don't have
54 * a real shader (or it fails to compile for some reason).
55 */
56 static unsigned passthrough_program[] = {
57 _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
58 /* move to output color:
59 */
60 (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
61 (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
62 ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
63 (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
64 (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
65 (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
66 0};
67
68 /**
69 * component-wise negation of ureg
70 */
71 static inline int
negate(int reg,int x,int y,int z,int w)72 negate(int reg, int x, int y, int z, int w)
73 {
74 /* Another neat thing about the UREG representation */
75 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
76 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
77 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
78 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
79 }
80
81 /**
82 * In the event of a translation failure, we'll generate a simple color
83 * pass-through program.
84 */
85 static void
i915_use_passthrough_shader(struct i915_fragment_shader * fs)86 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
87 {
88 fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
89 if (fs->program) {
90 memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
91 fs->program_len = ARRAY_SIZE(passthrough_program);
92 }
93 fs->num_constants = 0;
94 }
95
96 void
i915_program_error(struct i915_fp_compile * p,const char * msg,...)97 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
98 {
99 if (p->log_program_errors) {
100 va_list args;
101
102 va_start(args, msg);
103 mesa_loge_v(msg, args);
104 va_end(args);
105 }
106
107 p->error = 1;
108 }
109
110 static uint32_t
get_mapping(struct i915_fragment_shader * fs,enum tgsi_semantic semantic,int index)111 get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
112 int index)
113 {
114 int i;
115 for (i = 0; i < I915_TEX_UNITS; i++) {
116 if (fs->texcoords[i].semantic == -1) {
117 fs->texcoords[i].semantic = semantic;
118 fs->texcoords[i].index = index;
119 return i;
120 }
121 if (fs->texcoords[i].semantic == semantic &&
122 fs->texcoords[i].index == index)
123 return i;
124 }
125 debug_printf("Exceeded max generics\n");
126 return 0;
127 }
128
129 /**
130 * Construct a ureg for the given source register. Will emit
131 * constants, apply swizzling and negation as needed.
132 */
133 static uint32_t
src_vector(struct i915_fp_compile * p,const struct i915_full_src_register * source,struct i915_fragment_shader * fs)134 src_vector(struct i915_fp_compile *p,
135 const struct i915_full_src_register *source,
136 struct i915_fragment_shader *fs)
137 {
138 uint32_t index = source->Register.Index;
139 uint32_t src = 0, sem_name, sem_ind;
140
141 switch (source->Register.File) {
142 case TGSI_FILE_TEMPORARY:
143 if (source->Register.Index >= I915_MAX_TEMPORARY) {
144 i915_program_error(p, "Exceeded max temporary reg");
145 return 0;
146 }
147 src = UREG(REG_TYPE_R, index);
148 break;
149 case TGSI_FILE_INPUT:
150 /* XXX: Packing COL1, FOGC into a single attribute works for
151 * texenv programs, but will fail for real fragment programs
152 * that use these attributes and expect them to be a full 4
153 * components wide. Could use a texcoord to pass these
154 * attributes if necessary, but that won't work in the general
155 * case.
156 *
157 * We also use a texture coordinate to pass wpos when possible.
158 */
159
160 sem_name = p->shader->info.input_semantic_name[index];
161 sem_ind = p->shader->info.input_semantic_index[index];
162
163 switch (sem_name) {
164 case TGSI_SEMANTIC_GENERIC:
165 case TGSI_SEMANTIC_TEXCOORD:
166 case TGSI_SEMANTIC_PCOORD:
167 case TGSI_SEMANTIC_POSITION: {
168 if (sem_name == TGSI_SEMANTIC_PCOORD)
169 fs->reads_pntc = true;
170
171 int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
172 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
173 D0_CHANNEL_ALL);
174 break;
175 }
176 case TGSI_SEMANTIC_COLOR:
177 if (sem_ind == 0) {
178 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
179 } else {
180 /* secondary color */
181 assert(sem_ind == 1);
182 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
183 src = swizzle(src, X, Y, Z, ONE);
184 }
185 break;
186 case TGSI_SEMANTIC_FOG:
187 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
188 src = swizzle(src, W, W, W, W);
189 break;
190 case TGSI_SEMANTIC_FACE: {
191 /* for back/front faces */
192 int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
193 src =
194 i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
195 break;
196 }
197 default:
198 i915_program_error(p, "Bad source->Index");
199 return 0;
200 }
201 break;
202
203 case TGSI_FILE_IMMEDIATE:
204 assert(index < p->num_immediates);
205 index = p->immediates_map[index];
206 FALLTHROUGH;
207 case TGSI_FILE_CONSTANT:
208 src = UREG(REG_TYPE_CONST, index);
209 break;
210
211 default:
212 i915_program_error(p, "Bad source->File");
213 return 0;
214 }
215
216 src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
217 source->Register.SwizzleZ, source->Register.SwizzleW);
218
219 /* No HW abs flag, so we have to max with the negation. */
220 if (source->Register.Absolute) {
221 uint32_t tmp = i915_get_utemp(p);
222 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
223 negate(src, 1, 1, 1, 1), 0);
224 src = tmp;
225 }
226
227 /* There's both negate-all-components and per-component negation.
228 * Try to handle both here.
229 */
230 {
231 int n = source->Register.Negate;
232 src = negate(src, n, n, n, n);
233 }
234
235 return src;
236 }
237
238 /**
239 * Construct a ureg for a destination register.
240 */
241 static uint32_t
get_result_vector(struct i915_fp_compile * p,const struct i915_full_dst_register * dest)242 get_result_vector(struct i915_fp_compile *p,
243 const struct i915_full_dst_register *dest)
244 {
245 switch (dest->Register.File) {
246 case TGSI_FILE_OUTPUT: {
247 uint32_t sem_name =
248 p->shader->info.output_semantic_name[dest->Register.Index];
249 switch (sem_name) {
250 case TGSI_SEMANTIC_POSITION:
251 return UREG(REG_TYPE_OD, 0);
252 case TGSI_SEMANTIC_COLOR:
253 return UREG(REG_TYPE_OC, 0);
254 default:
255 i915_program_error(p, "Bad inst->DstReg.Index/semantics");
256 return 0;
257 }
258 }
259 case TGSI_FILE_TEMPORARY:
260 return UREG(REG_TYPE_R, dest->Register.Index);
261 default:
262 i915_program_error(p, "Bad inst->DstReg.File");
263 return 0;
264 }
265 }
266
267 /**
268 * Compute flags for saturation and writemask.
269 */
270 static uint32_t
get_result_flags(const struct i915_full_instruction * inst)271 get_result_flags(const struct i915_full_instruction *inst)
272 {
273 const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
274 uint32_t flags = 0x0;
275
276 if (inst->Instruction.Saturate)
277 flags |= A0_DEST_SATURATE;
278
279 if (writeMask & TGSI_WRITEMASK_X)
280 flags |= A0_DEST_CHANNEL_X;
281 if (writeMask & TGSI_WRITEMASK_Y)
282 flags |= A0_DEST_CHANNEL_Y;
283 if (writeMask & TGSI_WRITEMASK_Z)
284 flags |= A0_DEST_CHANNEL_Z;
285 if (writeMask & TGSI_WRITEMASK_W)
286 flags |= A0_DEST_CHANNEL_W;
287
288 return flags;
289 }
290
291 /**
292 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
293 */
294 static uint32_t
translate_tex_src_target(struct i915_fp_compile * p,uint32_t tex)295 translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
296 {
297 switch (tex) {
298 case TGSI_TEXTURE_SHADOW1D:
299 FALLTHROUGH;
300 case TGSI_TEXTURE_1D:
301 return D0_SAMPLE_TYPE_2D;
302
303 case TGSI_TEXTURE_SHADOW2D:
304 FALLTHROUGH;
305 case TGSI_TEXTURE_2D:
306 return D0_SAMPLE_TYPE_2D;
307
308 case TGSI_TEXTURE_SHADOWRECT:
309 FALLTHROUGH;
310 case TGSI_TEXTURE_RECT:
311 return D0_SAMPLE_TYPE_2D;
312
313 case TGSI_TEXTURE_3D:
314 return D0_SAMPLE_TYPE_VOLUME;
315
316 case TGSI_TEXTURE_CUBE:
317 return D0_SAMPLE_TYPE_CUBE;
318
319 default:
320 i915_program_error(p, "TexSrc type");
321 return 0;
322 }
323 }
324
325 /**
326 * Return the number of coords needed to access a given TGSI_TEXTURE_*
327 */
328 uint32_t
i915_num_coords(uint32_t tex)329 i915_num_coords(uint32_t tex)
330 {
331 switch (tex) {
332 case TGSI_TEXTURE_SHADOW1D:
333 case TGSI_TEXTURE_1D:
334 return 1;
335
336 case TGSI_TEXTURE_SHADOW2D:
337 case TGSI_TEXTURE_2D:
338 case TGSI_TEXTURE_SHADOWRECT:
339 case TGSI_TEXTURE_RECT:
340 return 2;
341
342 case TGSI_TEXTURE_3D:
343 case TGSI_TEXTURE_CUBE:
344 return 3;
345
346 default:
347 debug_printf("Unknown texture target for num coords");
348 return 2;
349 }
350 }
351
352 /**
353 * Generate texel lookup instruction.
354 */
355 static void
emit_tex(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,struct i915_fragment_shader * fs)356 emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
357 uint32_t opcode, struct i915_fragment_shader *fs)
358 {
359 uint32_t texture = inst->Texture.Texture;
360 uint32_t unit = inst->Src[1].Register.Index;
361 uint32_t tex = translate_tex_src_target(p, texture);
362 uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
363 uint32_t coord = src_vector(p, &inst->Src[0], fs);
364
365 i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
366 get_result_flags(inst), sampler, coord, opcode,
367 i915_num_coords(texture));
368 }
369
370 /**
371 * Generate a simple arithmetic instruction
372 * \param opcode the i915 opcode
373 * \param numArgs the number of input/src arguments
374 */
375 static void
emit_simple_arith(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)376 emit_simple_arith(struct i915_fp_compile *p,
377 const struct i915_full_instruction *inst, uint32_t opcode,
378 uint32_t numArgs, struct i915_fragment_shader *fs)
379 {
380 uint32_t arg1, arg2, arg3;
381
382 assert(numArgs <= 3);
383
384 arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
385 arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
386 arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
387
388 i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
389 get_result_flags(inst), 0, arg1, arg2, arg3);
390 }
391
392 /** As above, but swap the first two src regs */
393 static void
emit_simple_arith_swap2(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)394 emit_simple_arith_swap2(struct i915_fp_compile *p,
395 const struct i915_full_instruction *inst,
396 uint32_t opcode, uint32_t numArgs,
397 struct i915_fragment_shader *fs)
398 {
399 struct i915_full_instruction inst2;
400
401 assert(numArgs == 2);
402
403 /* transpose first two registers */
404 inst2 = *inst;
405 inst2.Src[0] = inst->Src[1];
406 inst2.Src[1] = inst->Src[0];
407
408 emit_simple_arith(p, &inst2, opcode, numArgs, fs);
409 }
410
411 /*
412 * Translate TGSI instruction to i915 instruction.
413 *
414 * Possible concerns:
415 *
416 * DDX, DDY -- return 0
417 * SIN, COS -- could use another taylor step?
418 * LIT -- results seem a little different to sw mesa
419 * LOG -- different to mesa on negative numbers, but this is conformant.
420 */
421 static void
i915_translate_instruction(struct i915_fp_compile * p,const struct i915_full_instruction * inst,struct i915_fragment_shader * fs)422 i915_translate_instruction(struct i915_fp_compile *p,
423 const struct i915_full_instruction *inst,
424 struct i915_fragment_shader *fs)
425 {
426 uint32_t src0, src1, src2, flags;
427 uint32_t tmp = 0;
428
429 switch (inst->Instruction.Opcode) {
430 case TGSI_OPCODE_ADD:
431 emit_simple_arith(p, inst, A0_ADD, 2, fs);
432 break;
433
434 case TGSI_OPCODE_CEIL:
435 src0 = src_vector(p, &inst->Src[0], fs);
436 tmp = i915_get_utemp(p);
437 flags = get_result_flags(inst);
438 i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
439 negate(src0, 1, 1, 1, 1), 0, 0);
440 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
441 negate(tmp, 1, 1, 1, 1), 0, 0);
442 break;
443
444 case TGSI_OPCODE_CMP:
445 src0 = src_vector(p, &inst->Src[0], fs);
446 src1 = src_vector(p, &inst->Src[1], fs);
447 src2 = src_vector(p, &inst->Src[2], fs);
448 i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
449 get_result_flags(inst), 0, src0, src2,
450 src1); /* NOTE: order of src2, src1 */
451 break;
452
453 case TGSI_OPCODE_DDX:
454 case TGSI_OPCODE_DDY:
455 /* XXX We just output 0 here */
456 debug_printf("Punting DDX/DDY\n");
457 src0 = get_result_vector(p, &inst->Dst[0]);
458 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
459 get_result_flags(inst), 0,
460 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
461 break;
462
463 case TGSI_OPCODE_DP2:
464 src0 = src_vector(p, &inst->Src[0], fs);
465 src1 = src_vector(p, &inst->Src[1], fs);
466
467 i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
468 get_result_flags(inst), 0,
469 swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
470 break;
471
472 case TGSI_OPCODE_DP3:
473 emit_simple_arith(p, inst, A0_DP3, 2, fs);
474 break;
475
476 case TGSI_OPCODE_DP4:
477 emit_simple_arith(p, inst, A0_DP4, 2, fs);
478 break;
479
480 case TGSI_OPCODE_DST:
481 src0 = src_vector(p, &inst->Src[0], fs);
482 src1 = src_vector(p, &inst->Src[1], fs);
483
484 /* result[0] = 1 * 1;
485 * result[1] = a[1] * b[1];
486 * result[2] = a[2] * 1;
487 * result[3] = 1 * b[3];
488 */
489 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
490 get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
491 swizzle(src1, ONE, Y, ONE, W), 0);
492 break;
493
494 case TGSI_OPCODE_END:
495 /* no-op */
496 break;
497
498 case TGSI_OPCODE_EX2:
499 src0 = src_vector(p, &inst->Src[0], fs);
500
501 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
502 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
503 0);
504 break;
505
506 case TGSI_OPCODE_FLR:
507 emit_simple_arith(p, inst, A0_FLR, 1, fs);
508 break;
509
510 case TGSI_OPCODE_FRC:
511 emit_simple_arith(p, inst, A0_FRC, 1, fs);
512 break;
513
514 case TGSI_OPCODE_KILL_IF:
515 /* kill if src[0].x < 0 || src[0].y < 0 ... */
516 src0 = src_vector(p, &inst->Src[0], fs);
517 tmp = i915_get_utemp(p);
518
519 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
520 A0_DEST_CHANNEL_ALL, /* dest writemask */
521 0, /* sampler */
522 src0, /* coord*/
523 T0_TEXKILL, /* opcode */
524 1); /* num_coord */
525 break;
526
527 case TGSI_OPCODE_KILL:
528 /* unconditional kill */
529 tmp = i915_get_utemp(p);
530
531 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
532 A0_DEST_CHANNEL_ALL, /* dest writemask */
533 0, /* sampler */
534 negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
535 1, 1, 1, 1), /* coord */
536 T0_TEXKILL, /* opcode */
537 1); /* num_coord */
538 break;
539
540 case TGSI_OPCODE_LG2:
541 src0 = src_vector(p, &inst->Src[0], fs);
542
543 i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
544 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
545 0);
546 break;
547
548 case TGSI_OPCODE_LIT:
549 src0 = src_vector(p, &inst->Src[0], fs);
550 tmp = i915_get_utemp(p);
551
552 /* tmp = max( a.xyzw, a.00zw )
553 * XXX: Clamp tmp.w to -128..128
554 * tmp.y = log(tmp.y)
555 * tmp.y = tmp.w * tmp.y
556 * tmp.y = exp(tmp.y)
557 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
558 */
559 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
560 swizzle(src0, ZERO, ZERO, Z, W), 0);
561
562 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
563 swizzle(tmp, Y, Y, Y, Y), 0, 0);
564
565 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
566 swizzle(tmp, ZERO, Y, ZERO, ZERO),
567 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
568
569 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
570 swizzle(tmp, Y, Y, Y, Y), 0, 0);
571
572 i915_emit_arith(
573 p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
574 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
575 swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
576
577 break;
578
579 case TGSI_OPCODE_LRP:
580 src0 = src_vector(p, &inst->Src[0], fs);
581 src1 = src_vector(p, &inst->Src[1], fs);
582 src2 = src_vector(p, &inst->Src[2], fs);
583 flags = get_result_flags(inst);
584 tmp = i915_get_utemp(p);
585
586 /* b*a + c*(1-a)
587 *
588 * b*a + c - ca
589 *
590 * tmp = b*a + c,
591 * result = (-c)*a + tmp
592 */
593 i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
594 src0, src2);
595
596 i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
597 negate(src2, 1, 1, 1, 1), src0, tmp);
598 break;
599
600 case TGSI_OPCODE_MAD:
601 emit_simple_arith(p, inst, A0_MAD, 3, fs);
602 break;
603
604 case TGSI_OPCODE_MAX:
605 emit_simple_arith(p, inst, A0_MAX, 2, fs);
606 break;
607
608 case TGSI_OPCODE_MIN:
609 emit_simple_arith(p, inst, A0_MIN, 2, fs);
610 break;
611
612 case TGSI_OPCODE_MOV:
613 emit_simple_arith(p, inst, A0_MOV, 1, fs);
614 break;
615
616 case TGSI_OPCODE_MUL:
617 emit_simple_arith(p, inst, A0_MUL, 2, fs);
618 break;
619
620 case TGSI_OPCODE_NOP:
621 break;
622
623 case TGSI_OPCODE_POW:
624 src0 = src_vector(p, &inst->Src[0], fs);
625 src1 = src_vector(p, &inst->Src[1], fs);
626 tmp = i915_get_utemp(p);
627 flags = get_result_flags(inst);
628
629 /* XXX: masking on intermediate values, here and elsewhere.
630 */
631 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
632 swizzle(src0, X, X, X, X), 0, 0);
633
634 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
635
636 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
637 swizzle(tmp, X, X, X, X), 0, 0);
638 break;
639
640 case TGSI_OPCODE_RET:
641 /* XXX: no-op? */
642 break;
643
644 case TGSI_OPCODE_RCP:
645 src0 = src_vector(p, &inst->Src[0], fs);
646
647 i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
648 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
649 0);
650 break;
651
652 case TGSI_OPCODE_RSQ:
653 src0 = src_vector(p, &inst->Src[0], fs);
654
655 i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
656 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
657 0);
658 break;
659
660 case TGSI_OPCODE_SEQ:
661 /* if we're both >= and <= then we're == */
662 src0 = src_vector(p, &inst->Src[0], fs);
663 src1 = src_vector(p, &inst->Src[1], fs);
664 tmp = i915_get_utemp(p);
665
666 i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
667
668 i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
669 get_result_flags(inst), 0, src1, src0, 0);
670
671 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
672 get_result_flags(inst), 0,
673 get_result_vector(p, &inst->Dst[0]), tmp, 0);
674
675 break;
676
677 case TGSI_OPCODE_SGE:
678 emit_simple_arith(p, inst, A0_SGE, 2, fs);
679 break;
680
681 case TGSI_OPCODE_SLE:
682 /* like SGE, but swap reg0, reg1 */
683 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
684 break;
685
686 case TGSI_OPCODE_SLT:
687 emit_simple_arith(p, inst, A0_SLT, 2, fs);
688 break;
689
690 case TGSI_OPCODE_SGT:
691 /* like SLT, but swap reg0, reg1 */
692 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
693 break;
694
695 case TGSI_OPCODE_SNE:
696 /* if we're < or > then we're != */
697 src0 = src_vector(p, &inst->Src[0], fs);
698 src1 = src_vector(p, &inst->Src[1], fs);
699 tmp = i915_get_utemp(p);
700
701 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
702
703 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
704 get_result_flags(inst), 0, src1, src0, 0);
705
706 i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
707 get_result_flags(inst), 0,
708 get_result_vector(p, &inst->Dst[0]), tmp, 0);
709 break;
710
711 case TGSI_OPCODE_SSG:
712 /* compute (src>0) - (src<0) */
713 src0 = src_vector(p, &inst->Src[0], fs);
714 tmp = i915_get_utemp(p);
715
716 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
717 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
718
719 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
720 get_result_flags(inst), 0,
721 swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
722
723 i915_emit_arith(
724 p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
725 0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
726 break;
727
728 case TGSI_OPCODE_TEX:
729 emit_tex(p, inst, T0_TEXLD, fs);
730 break;
731
732 case TGSI_OPCODE_TRUNC:
733 emit_simple_arith(p, inst, A0_TRC, 1, fs);
734 break;
735
736 case TGSI_OPCODE_TXB:
737 emit_tex(p, inst, T0_TEXLDB, fs);
738 break;
739
740 case TGSI_OPCODE_TXP:
741 emit_tex(p, inst, T0_TEXLDP, fs);
742 break;
743
744 default:
745 i915_program_error(p, "bad opcode %s (%d)",
746 tgsi_get_opcode_name(inst->Instruction.Opcode),
747 inst->Instruction.Opcode);
748 return;
749 }
750
751 i915_release_utemps(p);
752 }
753
754 static void
i915_translate_token(struct i915_fp_compile * p,const union i915_full_token * token,struct i915_fragment_shader * fs)755 i915_translate_token(struct i915_fp_compile *p,
756 const union i915_full_token *token,
757 struct i915_fragment_shader *fs)
758 {
759 struct i915_fragment_shader *ifs = p->shader;
760 switch (token->Token.Type) {
761 case TGSI_TOKEN_TYPE_PROPERTY:
762 /* Ignore properties where we only support one value. */
763 assert(token->FullProperty.Property.PropertyName ==
764 TGSI_PROPERTY_FS_COORD_ORIGIN ||
765 token->FullProperty.Property.PropertyName ==
766 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
767 token->FullProperty.Property.PropertyName ==
768 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
769 break;
770
771 case TGSI_TOKEN_TYPE_DECLARATION:
772 if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
773 if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
774 i915_program_error(p, "Exceeded %d max uniforms",
775 I915_MAX_CONSTANT);
776 } else {
777 uint32_t i;
778 for (i = token->FullDeclaration.Range.First;
779 i <= token->FullDeclaration.Range.Last; i++) {
780 ifs->constant_flags[i] = I915_CONSTFLAG_USER;
781 ifs->num_constants = MAX2(ifs->num_constants, i + 1);
782 }
783 }
784 } else if (token->FullDeclaration.Declaration.File ==
785 TGSI_FILE_TEMPORARY) {
786 if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
787 i915_program_error(p, "Exceeded %d max TGSI temps",
788 I915_MAX_TEMPORARY);
789 } else {
790 uint32_t i;
791 for (i = token->FullDeclaration.Range.First;
792 i <= token->FullDeclaration.Range.Last; i++) {
793 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
794 p->temp_flag |= (1 << i); /* mark temp as used */
795 }
796 }
797 }
798 break;
799
800 case TGSI_TOKEN_TYPE_IMMEDIATE: {
801 const struct tgsi_full_immediate *imm = &token->FullImmediate;
802 const uint32_t pos = p->num_immediates++;
803 uint32_t j;
804 assert(imm->Immediate.NrTokens <= 4 + 1);
805 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
806 p->immediates[pos][j] = imm->u[j].Float;
807 }
808 } break;
809
810 case TGSI_TOKEN_TYPE_INSTRUCTION:
811 if (p->first_instruction) {
812 /* resolve location of immediates */
813 uint32_t i, j;
814 for (i = 0; i < p->num_immediates; i++) {
815 /* find constant slot for this immediate */
816 for (j = 0; j < I915_MAX_CONSTANT; j++) {
817 if (ifs->constant_flags[j] == 0x0) {
818 memcpy(ifs->constants[j], p->immediates[i],
819 4 * sizeof(float));
820 /*printf("immediate %d maps to const %d\n", i, j);*/
821 ifs->constant_flags[j] = 0xf; /* all four comps used */
822 p->immediates_map[i] = j;
823 ifs->num_constants = MAX2(ifs->num_constants, j + 1);
824 break;
825 }
826 }
827 if (j == I915_MAX_CONSTANT) {
828 i915_program_error(p, "Exceeded %d max uniforms and immediates.",
829 I915_MAX_CONSTANT);
830 }
831 }
832
833 p->first_instruction = false;
834 }
835
836 i915_translate_instruction(p, &token->FullInstruction, fs);
837 break;
838
839 default:
840 assert(0);
841 }
842 }
843
844 /**
845 * Translate TGSI fragment shader into i915 hardware instructions.
846 * \param p the translation state
847 * \param tokens the TGSI token array
848 */
849 static void
i915_translate_instructions(struct i915_fp_compile * p,const struct i915_token_list * tokens,struct i915_fragment_shader * fs)850 i915_translate_instructions(struct i915_fp_compile *p,
851 const struct i915_token_list *tokens,
852 struct i915_fragment_shader *fs)
853 {
854 int i;
855 for (i = 0; i < tokens->NumTokens && !p->error; i++) {
856 i915_translate_token(p, &tokens->Tokens[i], fs);
857 }
858 }
859
860 static struct i915_fp_compile *
i915_init_compile(struct i915_context * i915,struct i915_fragment_shader * ifs)861 i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs)
862 {
863 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
864 int i;
865
866 p->shader = ifs;
867
868 /* Put new constants at end of const buffer, growing downward.
869 * The problem is we don't know how many user-defined constants might
870 * be specified with pipe->set_constant_buffer().
871 * Should pre-scan the user's program to determine the highest-numbered
872 * constant referenced.
873 */
874 ifs->num_constants = 0;
875 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
876
877 memset(&p->register_phases, 0, sizeof(p->register_phases));
878
879 for (i = 0; i < I915_TEX_UNITS; i++)
880 ifs->texcoords[i].semantic = -1;
881
882 p->log_program_errors = !i915->no_log_program_errors;
883
884 p->first_instruction = true;
885
886 p->nr_tex_indirect = 1; /* correct? */
887 p->nr_tex_insn = 0;
888 p->nr_alu_insn = 0;
889 p->nr_decl_insn = 0;
890
891 p->csr = p->program;
892 p->decl = p->declarations;
893 p->decl_s = 0;
894 p->decl_t = 0;
895 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
896 p->utemp_flag = ~0x7;
897
898 /* initialize the first program word */
899 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
900
901 return p;
902 }
903
904 /* Copy compile results to the fragment program struct and destroy the
905 * compilation context.
906 */
907 static void
i915_fini_compile(struct i915_context * i915,struct i915_fp_compile * p)908 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
909 {
910 struct i915_fragment_shader *ifs = p->shader;
911 unsigned long program_size = (unsigned long)(p->csr - p->program);
912 unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
913
914 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
915 debug_printf("Exceeded max nr indirect texture lookups\n");
916
917 if (p->nr_tex_insn > I915_MAX_TEX_INSN)
918 i915_program_error(p, "Exceeded max TEX instructions");
919
920 if (p->nr_alu_insn > I915_MAX_ALU_INSN)
921 i915_program_error(p, "Exceeded max ALU instructions");
922
923 if (p->nr_decl_insn > I915_MAX_DECL_INSN)
924 i915_program_error(p, "Exceeded max DECL instructions");
925
926 /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
927 * TGSI_END), even when the depth write fixup gets emitted below - maybe that
928 * one is fishy, too?
929 */
930 if (ifs->info.num_instructions == 1)
931 i915_program_error(p, "Empty fragment shader");
932
933 if (p->error) {
934 p->NumNativeInstructions = 0;
935 p->NumNativeAluInstructions = 0;
936 p->NumNativeTexInstructions = 0;
937 p->NumNativeTexIndirections = 0;
938
939 i915_use_passthrough_shader(ifs);
940 } else {
941 p->NumNativeInstructions =
942 p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
943 p->NumNativeAluInstructions = p->nr_alu_insn;
944 p->NumNativeTexInstructions = p->nr_tex_insn;
945 p->NumNativeTexIndirections = p->nr_tex_indirect;
946
947 /* patch in the program length */
948 p->declarations[0] |= program_size + decl_size - 2;
949
950 /* Copy compilation results to fragment program struct:
951 */
952 assert(!ifs->program);
953
954 ifs->program_len = decl_size + program_size;
955 ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
956 memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
957 memcpy(&ifs->program[decl_size], p->program,
958 program_size * sizeof(uint32_t));
959
960 pipe_debug_message(
961 &i915->debug, SHADER_INFO,
962 "%s shader: %d inst, %d tex, %d tex_indirect, %d const",
963 _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT), (int)program_size,
964 p->nr_tex_insn, p->nr_tex_indirect, ifs->num_constants);
965 }
966
967 /* Release the compilation struct:
968 */
969 FREE(p);
970 }
971
972 /**
973 * Rather than trying to intercept and jiggle depth writes during
974 * emit, just move the value into its correct position at the end of
975 * the program:
976 */
977 static void
i915_fixup_depth_write(struct i915_fp_compile * p)978 i915_fixup_depth_write(struct i915_fp_compile *p)
979 {
980 for (int i = 0; i < p->shader->info.num_outputs; i++) {
981 if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
982 continue;
983
984 const uint32_t depth = UREG(REG_TYPE_OD, 0);
985
986 i915_emit_arith(p, A0_MOV, /* opcode */
987 depth, /* dest reg */
988 A0_DEST_CHANNEL_W, /* write mask */
989 0, /* saturate? */
990 swizzle(depth, X, Y, Z, Z), /* src0 */
991 0, 0 /* src1, src2 */);
992 }
993 }
994
995 void
i915_translate_fragment_program(struct i915_context * i915,struct i915_fragment_shader * fs)996 i915_translate_fragment_program(struct i915_context *i915,
997 struct i915_fragment_shader *fs)
998 {
999 struct i915_fp_compile *p;
1000 const struct tgsi_token *tokens = fs->state.tokens;
1001 struct i915_token_list *i_tokens;
1002
1003 if (I915_DBG_ON(DBG_FS)) {
1004 mesa_logi("TGSI fragment shader:");
1005 tgsi_dump(tokens, 0);
1006 }
1007
1008 p = i915_init_compile(i915, fs);
1009
1010 i_tokens = i915_optimize(tokens);
1011 i915_translate_instructions(p, i_tokens, fs);
1012 i915_fixup_depth_write(p);
1013
1014 i915_fini_compile(i915, p);
1015 i915_optimize_free(i_tokens);
1016
1017 if (I915_DBG_ON(DBG_FS)) {
1018 mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1019 fs->num_constants ? ":" : "");
1020
1021 for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1022 if (fs->constant_flags[i] &&
1023 fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1024 mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1025 fs->constants[i][1], fs->constants[i][2],
1026 fs->constants[i][3]);
1027 }
1028 }
1029 i915_disassemble_program(fs->program, fs->program_len);
1030 }
1031 }
1032