1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include <stdarg.h>
29 
30 #include "i915_context.h"
31 #include "i915_debug.h"
32 #include "i915_debug_private.h"
33 #include "i915_fpc.h"
34 #include "i915_reg.h"
35 
36 #include "pipe/p_shader_tokens.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "util/log.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_string.h"
45 
46 #include "draw/draw_vertex.h"
47 
48 #ifndef M_PI
49 #define M_PI 3.14159265358979323846
50 #endif
51 
52 /**
53  * Simple pass-through fragment shader to use when we don't have
54  * a real shader (or it fails to compile for some reason).
55  */
56 static unsigned passthrough_program[] = {
57    _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
58    /* move to output color:
59     */
60    (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
61     (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
62    ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
63     (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
64     (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
65     (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
66    0};
67 
68 /**
69  * component-wise negation of ureg
70  */
71 static inline int
negate(int reg,int x,int y,int z,int w)72 negate(int reg, int x, int y, int z, int w)
73 {
74    /* Another neat thing about the UREG representation */
75    return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
76                  ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
77                  ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
78                  ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
79 }
80 
81 /**
82  * In the event of a translation failure, we'll generate a simple color
83  * pass-through program.
84  */
85 static void
i915_use_passthrough_shader(struct i915_fragment_shader * fs)86 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
87 {
88    fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
89    if (fs->program) {
90       memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
91       fs->program_len = ARRAY_SIZE(passthrough_program);
92    }
93    fs->num_constants = 0;
94 }
95 
96 void
i915_program_error(struct i915_fp_compile * p,const char * msg,...)97 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
98 {
99    if (p->log_program_errors) {
100       va_list args;
101 
102       va_start(args, msg);
103       mesa_loge_v(msg, args);
104       va_end(args);
105    }
106 
107    p->error = 1;
108 }
109 
110 static uint32_t
get_mapping(struct i915_fragment_shader * fs,enum tgsi_semantic semantic,int index)111 get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
112             int index)
113 {
114    int i;
115    for (i = 0; i < I915_TEX_UNITS; i++) {
116       if (fs->texcoords[i].semantic == -1) {
117          fs->texcoords[i].semantic = semantic;
118          fs->texcoords[i].index = index;
119          return i;
120       }
121       if (fs->texcoords[i].semantic == semantic &&
122           fs->texcoords[i].index == index)
123          return i;
124    }
125    debug_printf("Exceeded max generics\n");
126    return 0;
127 }
128 
129 /**
130  * Construct a ureg for the given source register.  Will emit
131  * constants, apply swizzling and negation as needed.
132  */
133 static uint32_t
src_vector(struct i915_fp_compile * p,const struct i915_full_src_register * source,struct i915_fragment_shader * fs)134 src_vector(struct i915_fp_compile *p,
135            const struct i915_full_src_register *source,
136            struct i915_fragment_shader *fs)
137 {
138    uint32_t index = source->Register.Index;
139    uint32_t src = 0, sem_name, sem_ind;
140 
141    switch (source->Register.File) {
142    case TGSI_FILE_TEMPORARY:
143       if (source->Register.Index >= I915_MAX_TEMPORARY) {
144          i915_program_error(p, "Exceeded max temporary reg");
145          return 0;
146       }
147       src = UREG(REG_TYPE_R, index);
148       break;
149    case TGSI_FILE_INPUT:
150       /* XXX: Packing COL1, FOGC into a single attribute works for
151        * texenv programs, but will fail for real fragment programs
152        * that use these attributes and expect them to be a full 4
153        * components wide.  Could use a texcoord to pass these
154        * attributes if necessary, but that won't work in the general
155        * case.
156        *
157        * We also use a texture coordinate to pass wpos when possible.
158        */
159 
160       sem_name = p->shader->info.input_semantic_name[index];
161       sem_ind = p->shader->info.input_semantic_index[index];
162 
163       switch (sem_name) {
164       case TGSI_SEMANTIC_GENERIC:
165       case TGSI_SEMANTIC_TEXCOORD:
166       case TGSI_SEMANTIC_PCOORD:
167       case TGSI_SEMANTIC_POSITION: {
168          if (sem_name == TGSI_SEMANTIC_PCOORD)
169             fs->reads_pntc = true;
170 
171          int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
172          src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
173                               D0_CHANNEL_ALL);
174          break;
175       }
176       case TGSI_SEMANTIC_COLOR:
177          if (sem_ind == 0) {
178             src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
179          } else {
180             /* secondary color */
181             assert(sem_ind == 1);
182             src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
183             src = swizzle(src, X, Y, Z, ONE);
184          }
185          break;
186       case TGSI_SEMANTIC_FOG:
187          src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
188          src = swizzle(src, W, W, W, W);
189          break;
190       case TGSI_SEMANTIC_FACE: {
191          /* for back/front faces */
192          int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
193          src =
194             i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
195          break;
196       }
197       default:
198          i915_program_error(p, "Bad source->Index");
199          return 0;
200       }
201       break;
202 
203    case TGSI_FILE_IMMEDIATE:
204       assert(index < p->num_immediates);
205       index = p->immediates_map[index];
206       FALLTHROUGH;
207    case TGSI_FILE_CONSTANT:
208       src = UREG(REG_TYPE_CONST, index);
209       break;
210 
211    default:
212       i915_program_error(p, "Bad source->File");
213       return 0;
214    }
215 
216    src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
217                  source->Register.SwizzleZ, source->Register.SwizzleW);
218 
219    /* No HW abs flag, so we have to max with the negation. */
220    if (source->Register.Absolute) {
221       uint32_t tmp = i915_get_utemp(p);
222       i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
223                       negate(src, 1, 1, 1, 1), 0);
224       src = tmp;
225    }
226 
227    /* There's both negate-all-components and per-component negation.
228     * Try to handle both here.
229     */
230    {
231       int n = source->Register.Negate;
232       src = negate(src, n, n, n, n);
233    }
234 
235    return src;
236 }
237 
238 /**
239  * Construct a ureg for a destination register.
240  */
241 static uint32_t
get_result_vector(struct i915_fp_compile * p,const struct i915_full_dst_register * dest)242 get_result_vector(struct i915_fp_compile *p,
243                   const struct i915_full_dst_register *dest)
244 {
245    switch (dest->Register.File) {
246    case TGSI_FILE_OUTPUT: {
247       uint32_t sem_name =
248          p->shader->info.output_semantic_name[dest->Register.Index];
249       switch (sem_name) {
250       case TGSI_SEMANTIC_POSITION:
251          return UREG(REG_TYPE_OD, 0);
252       case TGSI_SEMANTIC_COLOR:
253          return UREG(REG_TYPE_OC, 0);
254       default:
255          i915_program_error(p, "Bad inst->DstReg.Index/semantics");
256          return 0;
257       }
258    }
259    case TGSI_FILE_TEMPORARY:
260       return UREG(REG_TYPE_R, dest->Register.Index);
261    default:
262       i915_program_error(p, "Bad inst->DstReg.File");
263       return 0;
264    }
265 }
266 
267 /**
268  * Compute flags for saturation and writemask.
269  */
270 static uint32_t
get_result_flags(const struct i915_full_instruction * inst)271 get_result_flags(const struct i915_full_instruction *inst)
272 {
273    const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
274    uint32_t flags = 0x0;
275 
276    if (inst->Instruction.Saturate)
277       flags |= A0_DEST_SATURATE;
278 
279    if (writeMask & TGSI_WRITEMASK_X)
280       flags |= A0_DEST_CHANNEL_X;
281    if (writeMask & TGSI_WRITEMASK_Y)
282       flags |= A0_DEST_CHANNEL_Y;
283    if (writeMask & TGSI_WRITEMASK_Z)
284       flags |= A0_DEST_CHANNEL_Z;
285    if (writeMask & TGSI_WRITEMASK_W)
286       flags |= A0_DEST_CHANNEL_W;
287 
288    return flags;
289 }
290 
291 /**
292  * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
293  */
294 static uint32_t
translate_tex_src_target(struct i915_fp_compile * p,uint32_t tex)295 translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
296 {
297    switch (tex) {
298    case TGSI_TEXTURE_SHADOW1D:
299       FALLTHROUGH;
300    case TGSI_TEXTURE_1D:
301       return D0_SAMPLE_TYPE_2D;
302 
303    case TGSI_TEXTURE_SHADOW2D:
304       FALLTHROUGH;
305    case TGSI_TEXTURE_2D:
306       return D0_SAMPLE_TYPE_2D;
307 
308    case TGSI_TEXTURE_SHADOWRECT:
309       FALLTHROUGH;
310    case TGSI_TEXTURE_RECT:
311       return D0_SAMPLE_TYPE_2D;
312 
313    case TGSI_TEXTURE_3D:
314       return D0_SAMPLE_TYPE_VOLUME;
315 
316    case TGSI_TEXTURE_CUBE:
317       return D0_SAMPLE_TYPE_CUBE;
318 
319    default:
320       i915_program_error(p, "TexSrc type");
321       return 0;
322    }
323 }
324 
325 /**
326  * Return the number of coords needed to access a given TGSI_TEXTURE_*
327  */
328 uint32_t
i915_num_coords(uint32_t tex)329 i915_num_coords(uint32_t tex)
330 {
331    switch (tex) {
332    case TGSI_TEXTURE_SHADOW1D:
333    case TGSI_TEXTURE_1D:
334       return 1;
335 
336    case TGSI_TEXTURE_SHADOW2D:
337    case TGSI_TEXTURE_2D:
338    case TGSI_TEXTURE_SHADOWRECT:
339    case TGSI_TEXTURE_RECT:
340       return 2;
341 
342    case TGSI_TEXTURE_3D:
343    case TGSI_TEXTURE_CUBE:
344       return 3;
345 
346    default:
347       debug_printf("Unknown texture target for num coords");
348       return 2;
349    }
350 }
351 
352 /**
353  * Generate texel lookup instruction.
354  */
355 static void
emit_tex(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,struct i915_fragment_shader * fs)356 emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
357          uint32_t opcode, struct i915_fragment_shader *fs)
358 {
359    uint32_t texture = inst->Texture.Texture;
360    uint32_t unit = inst->Src[1].Register.Index;
361    uint32_t tex = translate_tex_src_target(p, texture);
362    uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
363    uint32_t coord = src_vector(p, &inst->Src[0], fs);
364 
365    i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
366                    get_result_flags(inst), sampler, coord, opcode,
367                    i915_num_coords(texture));
368 }
369 
370 /**
371  * Generate a simple arithmetic instruction
372  * \param opcode  the i915 opcode
373  * \param numArgs  the number of input/src arguments
374  */
375 static void
emit_simple_arith(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)376 emit_simple_arith(struct i915_fp_compile *p,
377                   const struct i915_full_instruction *inst, uint32_t opcode,
378                   uint32_t numArgs, struct i915_fragment_shader *fs)
379 {
380    uint32_t arg1, arg2, arg3;
381 
382    assert(numArgs <= 3);
383 
384    arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
385    arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
386    arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
387 
388    i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
389                    get_result_flags(inst), 0, arg1, arg2, arg3);
390 }
391 
392 /** As above, but swap the first two src regs */
393 static void
emit_simple_arith_swap2(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)394 emit_simple_arith_swap2(struct i915_fp_compile *p,
395                         const struct i915_full_instruction *inst,
396                         uint32_t opcode, uint32_t numArgs,
397                         struct i915_fragment_shader *fs)
398 {
399    struct i915_full_instruction inst2;
400 
401    assert(numArgs == 2);
402 
403    /* transpose first two registers */
404    inst2 = *inst;
405    inst2.Src[0] = inst->Src[1];
406    inst2.Src[1] = inst->Src[0];
407 
408    emit_simple_arith(p, &inst2, opcode, numArgs, fs);
409 }
410 
411 /*
412  * Translate TGSI instruction to i915 instruction.
413  *
414  * Possible concerns:
415  *
416  * DDX, DDY -- return 0
417  * SIN, COS -- could use another taylor step?
418  * LIT      -- results seem a little different to sw mesa
419  * LOG      -- different to mesa on negative numbers, but this is conformant.
420  */
421 static void
i915_translate_instruction(struct i915_fp_compile * p,const struct i915_full_instruction * inst,struct i915_fragment_shader * fs)422 i915_translate_instruction(struct i915_fp_compile *p,
423                            const struct i915_full_instruction *inst,
424                            struct i915_fragment_shader *fs)
425 {
426    uint32_t src0, src1, src2, flags;
427    uint32_t tmp = 0;
428 
429    switch (inst->Instruction.Opcode) {
430    case TGSI_OPCODE_ADD:
431       emit_simple_arith(p, inst, A0_ADD, 2, fs);
432       break;
433 
434    case TGSI_OPCODE_CEIL:
435       src0 = src_vector(p, &inst->Src[0], fs);
436       tmp = i915_get_utemp(p);
437       flags = get_result_flags(inst);
438       i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
439                       negate(src0, 1, 1, 1, 1), 0, 0);
440       i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
441                       negate(tmp, 1, 1, 1, 1), 0, 0);
442       break;
443 
444    case TGSI_OPCODE_CMP:
445       src0 = src_vector(p, &inst->Src[0], fs);
446       src1 = src_vector(p, &inst->Src[1], fs);
447       src2 = src_vector(p, &inst->Src[2], fs);
448       i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
449                       get_result_flags(inst), 0, src0, src2,
450                       src1); /* NOTE: order of src2, src1 */
451       break;
452 
453    case TGSI_OPCODE_DDX:
454    case TGSI_OPCODE_DDY:
455       /* XXX We just output 0 here */
456       debug_printf("Punting DDX/DDY\n");
457       src0 = get_result_vector(p, &inst->Dst[0]);
458       i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
459                       get_result_flags(inst), 0,
460                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
461       break;
462 
463    case TGSI_OPCODE_DP2:
464       src0 = src_vector(p, &inst->Src[0], fs);
465       src1 = src_vector(p, &inst->Src[1], fs);
466 
467       i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
468                       get_result_flags(inst), 0,
469                       swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
470       break;
471 
472    case TGSI_OPCODE_DP3:
473       emit_simple_arith(p, inst, A0_DP3, 2, fs);
474       break;
475 
476    case TGSI_OPCODE_DP4:
477       emit_simple_arith(p, inst, A0_DP4, 2, fs);
478       break;
479 
480    case TGSI_OPCODE_DST:
481       src0 = src_vector(p, &inst->Src[0], fs);
482       src1 = src_vector(p, &inst->Src[1], fs);
483 
484       /* result[0] = 1    * 1;
485        * result[1] = a[1] * b[1];
486        * result[2] = a[2] * 1;
487        * result[3] = 1    * b[3];
488        */
489       i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
490                       get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
491                       swizzle(src1, ONE, Y, ONE, W), 0);
492       break;
493 
494    case TGSI_OPCODE_END:
495       /* no-op */
496       break;
497 
498    case TGSI_OPCODE_EX2:
499       src0 = src_vector(p, &inst->Src[0], fs);
500 
501       i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
502                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
503                       0);
504       break;
505 
506    case TGSI_OPCODE_FLR:
507       emit_simple_arith(p, inst, A0_FLR, 1, fs);
508       break;
509 
510    case TGSI_OPCODE_FRC:
511       emit_simple_arith(p, inst, A0_FRC, 1, fs);
512       break;
513 
514    case TGSI_OPCODE_KILL_IF:
515       /* kill if src[0].x < 0 || src[0].y < 0 ... */
516       src0 = src_vector(p, &inst->Src[0], fs);
517       tmp = i915_get_utemp(p);
518 
519       i915_emit_texld(p, tmp,              /* dest reg: a dummy reg */
520                       A0_DEST_CHANNEL_ALL, /* dest writemask */
521                       0,                   /* sampler */
522                       src0,                /* coord*/
523                       T0_TEXKILL,          /* opcode */
524                       1);                  /* num_coord */
525       break;
526 
527    case TGSI_OPCODE_KILL:
528       /* unconditional kill */
529       tmp = i915_get_utemp(p);
530 
531       i915_emit_texld(p, tmp,              /* dest reg: a dummy reg */
532                       A0_DEST_CHANNEL_ALL, /* dest writemask */
533                       0,                   /* sampler */
534                       negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
535                              1, 1, 1, 1), /* coord */
536                       T0_TEXKILL,         /* opcode */
537                       1);                 /* num_coord */
538       break;
539 
540    case TGSI_OPCODE_LG2:
541       src0 = src_vector(p, &inst->Src[0], fs);
542 
543       i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
544                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
545                       0);
546       break;
547 
548    case TGSI_OPCODE_LIT:
549       src0 = src_vector(p, &inst->Src[0], fs);
550       tmp = i915_get_utemp(p);
551 
552       /* tmp = max( a.xyzw, a.00zw )
553        * XXX: Clamp tmp.w to -128..128
554        * tmp.y = log(tmp.y)
555        * tmp.y = tmp.w * tmp.y
556        * tmp.y = exp(tmp.y)
557        * result = cmp (a.11-x1, a.1x01, a.1xy1 )
558        */
559       i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
560                       swizzle(src0, ZERO, ZERO, Z, W), 0);
561 
562       i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
563                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
564 
565       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
566                       swizzle(tmp, ZERO, Y, ZERO, ZERO),
567                       swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
568 
569       i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
570                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
571 
572       i915_emit_arith(
573          p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
574          0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
575          swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
576 
577       break;
578 
579    case TGSI_OPCODE_LRP:
580       src0 = src_vector(p, &inst->Src[0], fs);
581       src1 = src_vector(p, &inst->Src[1], fs);
582       src2 = src_vector(p, &inst->Src[2], fs);
583       flags = get_result_flags(inst);
584       tmp = i915_get_utemp(p);
585 
586       /* b*a + c*(1-a)
587        *
588        * b*a + c - ca
589        *
590        * tmp = b*a + c,
591        * result = (-c)*a + tmp
592        */
593       i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
594                       src0, src2);
595 
596       i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
597                       negate(src2, 1, 1, 1, 1), src0, tmp);
598       break;
599 
600    case TGSI_OPCODE_MAD:
601       emit_simple_arith(p, inst, A0_MAD, 3, fs);
602       break;
603 
604    case TGSI_OPCODE_MAX:
605       emit_simple_arith(p, inst, A0_MAX, 2, fs);
606       break;
607 
608    case TGSI_OPCODE_MIN:
609       emit_simple_arith(p, inst, A0_MIN, 2, fs);
610       break;
611 
612    case TGSI_OPCODE_MOV:
613       emit_simple_arith(p, inst, A0_MOV, 1, fs);
614       break;
615 
616    case TGSI_OPCODE_MUL:
617       emit_simple_arith(p, inst, A0_MUL, 2, fs);
618       break;
619 
620    case TGSI_OPCODE_NOP:
621       break;
622 
623    case TGSI_OPCODE_POW:
624       src0 = src_vector(p, &inst->Src[0], fs);
625       src1 = src_vector(p, &inst->Src[1], fs);
626       tmp = i915_get_utemp(p);
627       flags = get_result_flags(inst);
628 
629       /* XXX: masking on intermediate values, here and elsewhere.
630        */
631       i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
632                       swizzle(src0, X, X, X, X), 0, 0);
633 
634       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
635 
636       i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
637                       swizzle(tmp, X, X, X, X), 0, 0);
638       break;
639 
640    case TGSI_OPCODE_RET:
641       /* XXX: no-op? */
642       break;
643 
644    case TGSI_OPCODE_RCP:
645       src0 = src_vector(p, &inst->Src[0], fs);
646 
647       i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
648                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
649                       0);
650       break;
651 
652    case TGSI_OPCODE_RSQ:
653       src0 = src_vector(p, &inst->Src[0], fs);
654 
655       i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
656                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
657                       0);
658       break;
659 
660    case TGSI_OPCODE_SEQ:
661       /* if we're both >= and <= then we're == */
662       src0 = src_vector(p, &inst->Src[0], fs);
663       src1 = src_vector(p, &inst->Src[1], fs);
664       tmp = i915_get_utemp(p);
665 
666       i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
667 
668       i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
669                       get_result_flags(inst), 0, src1, src0, 0);
670 
671       i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
672                       get_result_flags(inst), 0,
673                       get_result_vector(p, &inst->Dst[0]), tmp, 0);
674 
675       break;
676 
677    case TGSI_OPCODE_SGE:
678       emit_simple_arith(p, inst, A0_SGE, 2, fs);
679       break;
680 
681    case TGSI_OPCODE_SLE:
682       /* like SGE, but swap reg0, reg1 */
683       emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
684       break;
685 
686    case TGSI_OPCODE_SLT:
687       emit_simple_arith(p, inst, A0_SLT, 2, fs);
688       break;
689 
690    case TGSI_OPCODE_SGT:
691       /* like SLT, but swap reg0, reg1 */
692       emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
693       break;
694 
695    case TGSI_OPCODE_SNE:
696       /* if we're < or > then we're != */
697       src0 = src_vector(p, &inst->Src[0], fs);
698       src1 = src_vector(p, &inst->Src[1], fs);
699       tmp = i915_get_utemp(p);
700 
701       i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
702 
703       i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
704                       get_result_flags(inst), 0, src1, src0, 0);
705 
706       i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
707                       get_result_flags(inst), 0,
708                       get_result_vector(p, &inst->Dst[0]), tmp, 0);
709       break;
710 
711    case TGSI_OPCODE_SSG:
712       /* compute (src>0) - (src<0) */
713       src0 = src_vector(p, &inst->Src[0], fs);
714       tmp = i915_get_utemp(p);
715 
716       i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
717                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
718 
719       i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
720                       get_result_flags(inst), 0,
721                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
722 
723       i915_emit_arith(
724          p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
725          0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
726       break;
727 
728    case TGSI_OPCODE_TEX:
729       emit_tex(p, inst, T0_TEXLD, fs);
730       break;
731 
732    case TGSI_OPCODE_TRUNC:
733       emit_simple_arith(p, inst, A0_TRC, 1, fs);
734       break;
735 
736    case TGSI_OPCODE_TXB:
737       emit_tex(p, inst, T0_TEXLDB, fs);
738       break;
739 
740    case TGSI_OPCODE_TXP:
741       emit_tex(p, inst, T0_TEXLDP, fs);
742       break;
743 
744    default:
745       i915_program_error(p, "bad opcode %s (%d)",
746                          tgsi_get_opcode_name(inst->Instruction.Opcode),
747                          inst->Instruction.Opcode);
748       return;
749    }
750 
751    i915_release_utemps(p);
752 }
753 
754 static void
i915_translate_token(struct i915_fp_compile * p,const union i915_full_token * token,struct i915_fragment_shader * fs)755 i915_translate_token(struct i915_fp_compile *p,
756                      const union i915_full_token *token,
757                      struct i915_fragment_shader *fs)
758 {
759    struct i915_fragment_shader *ifs = p->shader;
760    switch (token->Token.Type) {
761    case TGSI_TOKEN_TYPE_PROPERTY:
762       /* Ignore properties where we only support one value. */
763       assert(token->FullProperty.Property.PropertyName ==
764                 TGSI_PROPERTY_FS_COORD_ORIGIN ||
765              token->FullProperty.Property.PropertyName ==
766                 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
767              token->FullProperty.Property.PropertyName ==
768                 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
769       break;
770 
771    case TGSI_TOKEN_TYPE_DECLARATION:
772       if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
773          if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
774             i915_program_error(p, "Exceeded %d max uniforms",
775                                I915_MAX_CONSTANT);
776          } else {
777             uint32_t i;
778             for (i = token->FullDeclaration.Range.First;
779                  i <= token->FullDeclaration.Range.Last; i++) {
780                ifs->constant_flags[i] = I915_CONSTFLAG_USER;
781                ifs->num_constants = MAX2(ifs->num_constants, i + 1);
782             }
783          }
784       } else if (token->FullDeclaration.Declaration.File ==
785                  TGSI_FILE_TEMPORARY) {
786          if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
787             i915_program_error(p, "Exceeded %d max TGSI temps",
788                                I915_MAX_TEMPORARY);
789          } else {
790             uint32_t i;
791             for (i = token->FullDeclaration.Range.First;
792                  i <= token->FullDeclaration.Range.Last; i++) {
793                /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
794                p->temp_flag |= (1 << i); /* mark temp as used */
795             }
796          }
797       }
798       break;
799 
800    case TGSI_TOKEN_TYPE_IMMEDIATE: {
801       const struct tgsi_full_immediate *imm = &token->FullImmediate;
802       const uint32_t pos = p->num_immediates++;
803       uint32_t j;
804       assert(imm->Immediate.NrTokens <= 4 + 1);
805       for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
806          p->immediates[pos][j] = imm->u[j].Float;
807       }
808    } break;
809 
810    case TGSI_TOKEN_TYPE_INSTRUCTION:
811       if (p->first_instruction) {
812          /* resolve location of immediates */
813          uint32_t i, j;
814          for (i = 0; i < p->num_immediates; i++) {
815             /* find constant slot for this immediate */
816             for (j = 0; j < I915_MAX_CONSTANT; j++) {
817                if (ifs->constant_flags[j] == 0x0) {
818                   memcpy(ifs->constants[j], p->immediates[i],
819                          4 * sizeof(float));
820                   /*printf("immediate %d maps to const %d\n", i, j);*/
821                   ifs->constant_flags[j] = 0xf; /* all four comps used */
822                   p->immediates_map[i] = j;
823                   ifs->num_constants = MAX2(ifs->num_constants, j + 1);
824                   break;
825                }
826             }
827             if (j == I915_MAX_CONSTANT) {
828                i915_program_error(p, "Exceeded %d max uniforms and immediates.",
829                                   I915_MAX_CONSTANT);
830             }
831          }
832 
833          p->first_instruction = false;
834       }
835 
836       i915_translate_instruction(p, &token->FullInstruction, fs);
837       break;
838 
839    default:
840       assert(0);
841    }
842 }
843 
844 /**
845  * Translate TGSI fragment shader into i915 hardware instructions.
846  * \param p  the translation state
847  * \param tokens  the TGSI token array
848  */
849 static void
i915_translate_instructions(struct i915_fp_compile * p,const struct i915_token_list * tokens,struct i915_fragment_shader * fs)850 i915_translate_instructions(struct i915_fp_compile *p,
851                             const struct i915_token_list *tokens,
852                             struct i915_fragment_shader *fs)
853 {
854    int i;
855    for (i = 0; i < tokens->NumTokens && !p->error; i++) {
856       i915_translate_token(p, &tokens->Tokens[i], fs);
857    }
858 }
859 
860 static struct i915_fp_compile *
i915_init_compile(struct i915_context * i915,struct i915_fragment_shader * ifs)861 i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs)
862 {
863    struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
864    int i;
865 
866    p->shader = ifs;
867 
868    /* Put new constants at end of const buffer, growing downward.
869     * The problem is we don't know how many user-defined constants might
870     * be specified with pipe->set_constant_buffer().
871     * Should pre-scan the user's program to determine the highest-numbered
872     * constant referenced.
873     */
874    ifs->num_constants = 0;
875    memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
876 
877    memset(&p->register_phases, 0, sizeof(p->register_phases));
878 
879    for (i = 0; i < I915_TEX_UNITS; i++)
880       ifs->texcoords[i].semantic = -1;
881 
882    p->log_program_errors = !i915->no_log_program_errors;
883 
884    p->first_instruction = true;
885 
886    p->nr_tex_indirect = 1; /* correct? */
887    p->nr_tex_insn = 0;
888    p->nr_alu_insn = 0;
889    p->nr_decl_insn = 0;
890 
891    p->csr = p->program;
892    p->decl = p->declarations;
893    p->decl_s = 0;
894    p->decl_t = 0;
895    p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
896    p->utemp_flag = ~0x7;
897 
898    /* initialize the first program word */
899    *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
900 
901    return p;
902 }
903 
904 /* Copy compile results to the fragment program struct and destroy the
905  * compilation context.
906  */
907 static void
i915_fini_compile(struct i915_context * i915,struct i915_fp_compile * p)908 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
909 {
910    struct i915_fragment_shader *ifs = p->shader;
911    unsigned long program_size = (unsigned long)(p->csr - p->program);
912    unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
913 
914    if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
915       debug_printf("Exceeded max nr indirect texture lookups\n");
916 
917    if (p->nr_tex_insn > I915_MAX_TEX_INSN)
918       i915_program_error(p, "Exceeded max TEX instructions");
919 
920    if (p->nr_alu_insn > I915_MAX_ALU_INSN)
921       i915_program_error(p, "Exceeded max ALU instructions");
922 
923    if (p->nr_decl_insn > I915_MAX_DECL_INSN)
924       i915_program_error(p, "Exceeded max DECL instructions");
925 
926    /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
927     * TGSI_END), even when the depth write fixup gets emitted below - maybe that
928     * one is fishy, too?
929     */
930    if (ifs->info.num_instructions == 1)
931       i915_program_error(p, "Empty fragment shader");
932 
933    if (p->error) {
934       p->NumNativeInstructions = 0;
935       p->NumNativeAluInstructions = 0;
936       p->NumNativeTexInstructions = 0;
937       p->NumNativeTexIndirections = 0;
938 
939       i915_use_passthrough_shader(ifs);
940    } else {
941       p->NumNativeInstructions =
942          p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
943       p->NumNativeAluInstructions = p->nr_alu_insn;
944       p->NumNativeTexInstructions = p->nr_tex_insn;
945       p->NumNativeTexIndirections = p->nr_tex_indirect;
946 
947       /* patch in the program length */
948       p->declarations[0] |= program_size + decl_size - 2;
949 
950       /* Copy compilation results to fragment program struct:
951        */
952       assert(!ifs->program);
953 
954       ifs->program_len = decl_size + program_size;
955       ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
956       memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
957       memcpy(&ifs->program[decl_size], p->program,
958              program_size * sizeof(uint32_t));
959 
960       pipe_debug_message(
961          &i915->debug, SHADER_INFO,
962          "%s shader: %d inst, %d tex, %d tex_indirect, %d const",
963          _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT), (int)program_size,
964          p->nr_tex_insn, p->nr_tex_indirect, ifs->num_constants);
965    }
966 
967    /* Release the compilation struct:
968     */
969    FREE(p);
970 }
971 
972 /**
973  * Rather than trying to intercept and jiggle depth writes during
974  * emit, just move the value into its correct position at the end of
975  * the program:
976  */
977 static void
i915_fixup_depth_write(struct i915_fp_compile * p)978 i915_fixup_depth_write(struct i915_fp_compile *p)
979 {
980    for (int i = 0; i < p->shader->info.num_outputs; i++) {
981       if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
982          continue;
983 
984       const uint32_t depth = UREG(REG_TYPE_OD, 0);
985 
986       i915_emit_arith(p, A0_MOV,                  /* opcode */
987                       depth,                      /* dest reg */
988                       A0_DEST_CHANNEL_W,          /* write mask */
989                       0,                          /* saturate? */
990                       swizzle(depth, X, Y, Z, Z), /* src0 */
991                       0, 0 /* src1, src2 */);
992    }
993 }
994 
995 void
i915_translate_fragment_program(struct i915_context * i915,struct i915_fragment_shader * fs)996 i915_translate_fragment_program(struct i915_context *i915,
997                                 struct i915_fragment_shader *fs)
998 {
999    struct i915_fp_compile *p;
1000    const struct tgsi_token *tokens = fs->state.tokens;
1001    struct i915_token_list *i_tokens;
1002 
1003    if (I915_DBG_ON(DBG_FS)) {
1004       mesa_logi("TGSI fragment shader:");
1005       tgsi_dump(tokens, 0);
1006    }
1007 
1008    p = i915_init_compile(i915, fs);
1009 
1010    i_tokens = i915_optimize(tokens);
1011    i915_translate_instructions(p, i_tokens, fs);
1012    i915_fixup_depth_write(p);
1013 
1014    i915_fini_compile(i915, p);
1015    i915_optimize_free(i_tokens);
1016 
1017    if (I915_DBG_ON(DBG_FS)) {
1018       mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1019                 fs->num_constants ? ":" : "");
1020 
1021       for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1022          if (fs->constant_flags[i] &&
1023              fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1024             mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1025                       fs->constants[i][1], fs->constants[i][2],
1026                       fs->constants[i][3]);
1027          }
1028       }
1029       i915_disassemble_program(fs->program, fs->program_len);
1030    }
1031 }
1032