1 /**************************************************************************
2  *
3  * Copyright 2011 The Chromium OS authors.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "i915_context.h"
29 #include "i915_fpc.h"
30 #include "i915_reg.h"
31 
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_exec.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_string.h"
39 
40 struct i915_optimize_context {
41    int first_write[TGSI_EXEC_NUM_TEMPS];
42    int last_read[TGSI_EXEC_NUM_TEMPS];
43 };
44 
45 static bool
same_src_dst_reg(struct i915_full_src_register * s1,struct i915_full_dst_register * d1)46 same_src_dst_reg(struct i915_full_src_register *s1,
47                  struct i915_full_dst_register *d1)
48 {
49    return (s1->Register.File == d1->Register.File &&
50            s1->Register.Indirect == d1->Register.Indirect &&
51            s1->Register.Dimension == d1->Register.Dimension &&
52            s1->Register.Index == d1->Register.Index);
53 }
54 
55 static bool
same_dst_reg(struct i915_full_dst_register * d1,struct i915_full_dst_register * d2)56 same_dst_reg(struct i915_full_dst_register *d1,
57              struct i915_full_dst_register *d2)
58 {
59    return (d1->Register.File == d2->Register.File &&
60            d1->Register.Indirect == d2->Register.Indirect &&
61            d1->Register.Dimension == d2->Register.Dimension &&
62            d1->Register.Index == d2->Register.Index);
63 }
64 
65 static bool
same_src_reg(struct i915_full_src_register * d1,struct i915_full_src_register * d2)66 same_src_reg(struct i915_full_src_register *d1,
67              struct i915_full_src_register *d2)
68 {
69    return (d1->Register.File == d2->Register.File &&
70            d1->Register.Indirect == d2->Register.Indirect &&
71            d1->Register.Dimension == d2->Register.Dimension &&
72            d1->Register.Index == d2->Register.Index &&
73            d1->Register.Absolute == d2->Register.Absolute &&
74            d1->Register.Negate == d2->Register.Negate);
75 }
76 
77 static const struct {
78    bool is_texture;
79    bool commutes;
80    unsigned neutral_element;
81    unsigned num_dst;
82    unsigned num_src;
83 } op_table[TGSI_OPCODE_LAST] = {
84    [TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2},
85    [TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1},
86    [TGSI_OPCODE_CMP] = {false, false, 0, 1, 2},
87    [TGSI_OPCODE_COS] = {false, false, 0, 1, 1},
88    [TGSI_OPCODE_DDX] = {false, false, 0, 1, 0},
89    [TGSI_OPCODE_DDY] = {false, false, 0, 1, 0},
90    [TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
91    [TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
92    [TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
93    [TGSI_OPCODE_DST] = {false, false, 0, 1, 2},
94    [TGSI_OPCODE_END] = {false, false, 0, 0, 0},
95    [TGSI_OPCODE_EX2] = {false, false, 0, 1, 1},
96    [TGSI_OPCODE_FLR] = {false, false, 0, 1, 1},
97    [TGSI_OPCODE_FRC] = {false, false, 0, 1, 1},
98    [TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1},
99    [TGSI_OPCODE_KILL] = {false, false, 0, 0, 0},
100    [TGSI_OPCODE_LG2] = {false, false, 0, 1, 1},
101    [TGSI_OPCODE_LIT] = {false, false, 0, 1, 1},
102    [TGSI_OPCODE_LRP] = {false, false, 0, 1, 3},
103    [TGSI_OPCODE_MAX] = {false, false, 0, 1, 2},
104    [TGSI_OPCODE_MAD] = {false, false, 0, 1, 3},
105    [TGSI_OPCODE_MIN] = {false, false, 0, 1, 2},
106    [TGSI_OPCODE_MOV] = {false, false, 0, 1, 1},
107    [TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
108    [TGSI_OPCODE_NOP] = {false, false, 0, 0, 0},
109    [TGSI_OPCODE_POW] = {false, false, 0, 1, 2},
110    [TGSI_OPCODE_RCP] = {false, false, 0, 1, 1},
111    [TGSI_OPCODE_RET] = {false, false, 0, 0, 0},
112    [TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1},
113    [TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2},
114    [TGSI_OPCODE_SGE] = {false, false, 0, 1, 2},
115    [TGSI_OPCODE_SGT] = {false, false, 0, 1, 2},
116    [TGSI_OPCODE_SIN] = {false, false, 0, 1, 1},
117    [TGSI_OPCODE_SLE] = {false, false, 0, 1, 2},
118    [TGSI_OPCODE_SLT] = {false, false, 0, 1, 2},
119    [TGSI_OPCODE_SNE] = {false, false, 0, 1, 2},
120    [TGSI_OPCODE_SSG] = {false, false, 0, 1, 1},
121    [TGSI_OPCODE_TEX] = {true, false, 0, 1, 2},
122    [TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1},
123    [TGSI_OPCODE_TXB] = {true, false, 0, 1, 2},
124    [TGSI_OPCODE_TXP] = {true, false, 0, 1, 2},
125 };
126 
127 static bool
op_has_dst(unsigned opcode)128 op_has_dst(unsigned opcode)
129 {
130    return (op_table[opcode].num_dst > 0);
131 }
132 
133 static int
op_num_dst(unsigned opcode)134 op_num_dst(unsigned opcode)
135 {
136    return op_table[opcode].num_dst;
137 }
138 
139 static int
op_num_src(unsigned opcode)140 op_num_src(unsigned opcode)
141 {
142    return op_table[opcode].num_src;
143 }
144 
145 static bool
op_commutes(unsigned opcode)146 op_commutes(unsigned opcode)
147 {
148    return op_table[opcode].commutes;
149 }
150 
151 static unsigned
mask_for_unswizzled(int num_components)152 mask_for_unswizzled(int num_components)
153 {
154    unsigned mask = 0;
155    switch (num_components) {
156    case 4:
157       mask |= TGSI_WRITEMASK_W;
158       FALLTHROUGH;
159    case 3:
160       mask |= TGSI_WRITEMASK_Z;
161       FALLTHROUGH;
162    case 2:
163       mask |= TGSI_WRITEMASK_Y;
164       FALLTHROUGH;
165    case 1:
166       mask |= TGSI_WRITEMASK_X;
167    }
168    return mask;
169 }
170 
171 static bool
is_unswizzled(struct i915_full_src_register * r,unsigned write_mask)172 is_unswizzled(struct i915_full_src_register *r, unsigned write_mask)
173 {
174    if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
175       return false;
176    if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
177       return false;
178    if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
179       return false;
180    if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
181       return false;
182    return true;
183 }
184 
185 static bool
op_is_texture(unsigned opcode)186 op_is_texture(unsigned opcode)
187 {
188    return op_table[opcode].is_texture;
189 }
190 
191 static unsigned
op_neutral_element(unsigned opcode)192 op_neutral_element(unsigned opcode)
193 {
194    unsigned ne = op_table[opcode].neutral_element;
195    if (!ne) {
196       debug_printf("No neutral element for opcode %d\n", opcode);
197       ne = TGSI_SWIZZLE_ZERO;
198    }
199    return ne;
200 }
201 
202 /*
203  * Sets the swizzle to the neutral element for the operation for the bits
204  * of writemask which are set, swizzle to identity otherwise.
205  */
206 static void
set_neutral_element_swizzle(struct i915_full_src_register * r,unsigned write_mask,unsigned neutral)207 set_neutral_element_swizzle(struct i915_full_src_register *r,
208                             unsigned write_mask, unsigned neutral)
209 {
210    if (write_mask & TGSI_WRITEMASK_X)
211       r->Register.SwizzleX = neutral;
212    else
213       r->Register.SwizzleX = TGSI_SWIZZLE_X;
214 
215    if (write_mask & TGSI_WRITEMASK_Y)
216       r->Register.SwizzleY = neutral;
217    else
218       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
219 
220    if (write_mask & TGSI_WRITEMASK_Z)
221       r->Register.SwizzleZ = neutral;
222    else
223       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
224 
225    if (write_mask & TGSI_WRITEMASK_W)
226       r->Register.SwizzleW = neutral;
227    else
228       r->Register.SwizzleW = TGSI_SWIZZLE_W;
229 }
230 
231 static void
copy_src_reg(struct i915_src_register * o,const struct tgsi_src_register * i)232 copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
233 {
234    o->File = i->File;
235    o->Indirect = i->Indirect;
236    o->Dimension = i->Dimension;
237    o->Index = i->Index;
238    o->SwizzleX = i->SwizzleX;
239    o->SwizzleY = i->SwizzleY;
240    o->SwizzleZ = i->SwizzleZ;
241    o->SwizzleW = i->SwizzleW;
242    o->Absolute = i->Absolute;
243    o->Negate = i->Negate;
244 }
245 
246 static void
copy_dst_reg(struct i915_dst_register * o,const struct tgsi_dst_register * i)247 copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
248 {
249    o->File = i->File;
250    o->WriteMask = i->WriteMask;
251    o->Indirect = i->Indirect;
252    o->Dimension = i->Dimension;
253    o->Index = i->Index;
254 }
255 
256 static void
copy_instruction(struct i915_full_instruction * o,const struct tgsi_full_instruction * i)257 copy_instruction(struct i915_full_instruction *o,
258                  const struct tgsi_full_instruction *i)
259 {
260    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
261    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
262 
263    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
264 
265    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
266    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
267    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
268 }
269 
270 static void
copy_token(union i915_full_token * o,union tgsi_full_token * i)271 copy_token(union i915_full_token *o, union tgsi_full_token *i)
272 {
273    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
274       memcpy(o, i, sizeof(*o));
275    else
276       copy_instruction(&o->FullInstruction, &i->FullInstruction);
277 }
278 
279 static void
liveness_mark_written(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int pos)280 liveness_mark_written(struct i915_optimize_context *ctx,
281                       struct i915_full_dst_register *dst_reg, int pos)
282 {
283    int dst_reg_index;
284    if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
285       dst_reg_index = dst_reg->Register.Index;
286       assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
287       /* dead -> live transition */
288       if (ctx->first_write[dst_reg_index] != -1)
289          ctx->first_write[dst_reg_index] = pos;
290    }
291 }
292 
293 static void
liveness_mark_read(struct i915_optimize_context * ctx,struct i915_full_src_register * src_reg,int pos)294 liveness_mark_read(struct i915_optimize_context *ctx,
295                    struct i915_full_src_register *src_reg, int pos)
296 {
297    int src_reg_index;
298    if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
299       src_reg_index = src_reg->Register.Index;
300       assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
301       /* live -> dead transition */
302       if (ctx->last_read[src_reg_index] != -1)
303          ctx->last_read[src_reg_index] = pos;
304    }
305 }
306 
307 static void
liveness_analysis(struct i915_optimize_context * ctx,struct i915_token_list * tokens)308 liveness_analysis(struct i915_optimize_context *ctx,
309                   struct i915_token_list *tokens)
310 {
311    struct i915_full_dst_register *dst_reg;
312    struct i915_full_src_register *src_reg;
313    union i915_full_token *current;
314    unsigned opcode;
315    int num_dst, num_src;
316    int i = 0;
317 
318    for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
319       ctx->first_write[i] = -1;
320       ctx->last_read[i] = -1;
321    }
322 
323    for (i = 0; i < tokens->NumTokens; i++) {
324       current = &tokens->Tokens[i];
325 
326       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
327          continue;
328 
329       opcode = current->FullInstruction.Instruction.Opcode;
330       num_dst = op_num_dst(opcode);
331 
332       switch (num_dst) {
333       case 1:
334          dst_reg = &current->FullInstruction.Dst[0];
335          liveness_mark_written(ctx, dst_reg, i);
336       case 0:
337          break;
338       default:
339          debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
340          break;
341       }
342    }
343 
344    for (i = tokens->NumTokens - 1; i >= 0; i--) {
345       current = &tokens->Tokens[i];
346 
347       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
348          continue;
349 
350       opcode = current->FullInstruction.Instruction.Opcode;
351       num_src = op_num_src(opcode);
352 
353       switch (num_src) {
354       case 3:
355          src_reg = &current->FullInstruction.Src[2];
356          liveness_mark_read(ctx, src_reg, i);
357          FALLTHROUGH;
358       case 2:
359          src_reg = &current->FullInstruction.Src[1];
360          liveness_mark_read(ctx, src_reg, i);
361          FALLTHROUGH;
362       case 1:
363          src_reg = &current->FullInstruction.Src[0];
364          liveness_mark_read(ctx, src_reg, i);
365          FALLTHROUGH;
366       case 0:
367          break;
368       default:
369          debug_printf("Op %d has %d src regs\n", opcode, num_src);
370          break;
371       }
372    }
373 }
374 
375 static int
unused_from(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int from)376 unused_from(struct i915_optimize_context *ctx,
377             struct i915_full_dst_register *dst_reg, int from)
378 {
379    int dst_reg_index = dst_reg->Register.Index;
380    assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
381    return (from >= ctx->last_read[dst_reg_index]);
382 }
383 
384 /* Returns a mask with the components used for a texture access instruction */
385 static unsigned
i915_tex_mask(union i915_full_token * instr)386 i915_tex_mask(union i915_full_token *instr)
387 {
388    unsigned mask;
389 
390    /* Get the number of coords */
391    mask = mask_for_unswizzled(
392       i915_num_coords(instr->FullInstruction.Texture.Texture));
393 
394    /* Add the W component if projective */
395    if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
396       mask |= TGSI_WRITEMASK_W;
397 
398    return mask;
399 }
400 
401 static bool
target_is_texture2d(uint32_t tex)402 target_is_texture2d(uint32_t tex)
403 {
404    switch (tex) {
405    case TGSI_TEXTURE_2D:
406    case TGSI_TEXTURE_RECT:
407       return true;
408    default:
409       return false;
410    }
411 }
412 
413 /*
414  * Optimize away useless indirect texture reads:
415  *    MOV TEMP[0].xy, IN[0].xyyy
416  *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
417  * into:
418  *    TEX TEMP[1], IN[0], SAMP[0], 2D
419  *
420  * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
421  */
422 static void
i915_fpc_optimize_mov_before_tex(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)423 i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
424                                  struct i915_token_list *tokens, int index)
425 {
426    union i915_full_token *current = &tokens->Tokens[index - 1];
427    union i915_full_token *next = &tokens->Tokens[index];
428 
429    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
430        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
431        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
432        op_is_texture(next->FullInstruction.Instruction.Opcode) &&
433        target_is_texture2d(next->FullInstruction.Texture.Texture) &&
434        same_src_dst_reg(&next->FullInstruction.Src[0],
435                         &current->FullInstruction.Dst[0]) &&
436        is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
437        unused_from(ctx, &current->FullInstruction.Dst[0], index)) {
438       memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0],
439              sizeof(struct i915_src_register));
440       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
441    }
442 }
443 
444 /*
445  * Optimize away things like:
446  *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
447  *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
448  * into:
449  *    NOP
450  *    MOV OUT[0].xyw, TEMP[1].xyww
451  */
452 static void
i915_fpc_optimize_mov_after_mov(union i915_full_token * current,union i915_full_token * next)453 i915_fpc_optimize_mov_after_mov(union i915_full_token *current,
454                                 union i915_full_token *next)
455 {
456    struct i915_full_src_register *src_reg1, *src_reg2;
457    struct i915_full_dst_register *dst_reg1, *dst_reg2;
458    unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
459 
460    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
461        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
462        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
463        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
464        current->FullInstruction.Instruction.Saturate ==
465           next->FullInstruction.Instruction.Saturate &&
466        same_dst_reg(&next->FullInstruction.Dst[0],
467                     &current->FullInstruction.Dst[0]) &&
468        same_src_reg(&next->FullInstruction.Src[0],
469                     &current->FullInstruction.Src[0]) &&
470        !same_src_dst_reg(&current->FullInstruction.Src[0],
471                          &current->FullInstruction.Dst[0])) {
472       src_reg1 = &current->FullInstruction.Src[0];
473       dst_reg1 = &current->FullInstruction.Dst[0];
474       src_reg2 = &next->FullInstruction.Src[0];
475       dst_reg2 = &next->FullInstruction.Dst[0];
476 
477       /* Start with swizzles from the first mov */
478       swizzle_x = src_reg1->Register.SwizzleX;
479       swizzle_y = src_reg1->Register.SwizzleY;
480       swizzle_z = src_reg1->Register.SwizzleZ;
481       swizzle_w = src_reg1->Register.SwizzleW;
482 
483       /* Pile the second mov on top */
484       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
485          swizzle_x = src_reg2->Register.SwizzleX;
486       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
487          swizzle_y = src_reg2->Register.SwizzleY;
488       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
489          swizzle_z = src_reg2->Register.SwizzleZ;
490       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
491          swizzle_w = src_reg2->Register.SwizzleW;
492 
493       dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
494       src_reg2->Register.SwizzleX = swizzle_x;
495       src_reg2->Register.SwizzleY = swizzle_y;
496       src_reg2->Register.SwizzleZ = swizzle_z;
497       src_reg2->Register.SwizzleW = swizzle_w;
498 
499       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
500 
501       return;
502    }
503 }
504 
505 /*
506  * Optimize away things like:
507  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
508  *    MOV OUT[0].w, TEMP[2]
509  * into:
510  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
511  * This is useful for optimizing texenv.
512  */
513 static void
i915_fpc_optimize_mov_after_alu(union i915_full_token * current,union i915_full_token * next)514 i915_fpc_optimize_mov_after_alu(union i915_full_token *current,
515                                 union i915_full_token *next)
516 {
517    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
518        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
519        op_commutes(current->FullInstruction.Instruction.Opcode) &&
520        current->FullInstruction.Instruction.Saturate ==
521           next->FullInstruction.Instruction.Saturate &&
522        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
523        same_dst_reg(&next->FullInstruction.Dst[0],
524                     &current->FullInstruction.Dst[0]) &&
525        same_src_reg(&next->FullInstruction.Src[0],
526                     &current->FullInstruction.Src[1]) &&
527        !same_src_dst_reg(&next->FullInstruction.Src[0],
528                          &current->FullInstruction.Dst[0]) &&
529        is_unswizzled(&current->FullInstruction.Src[0],
530                      current->FullInstruction.Dst[0].Register.WriteMask) &&
531        is_unswizzled(&current->FullInstruction.Src[1],
532                      current->FullInstruction.Dst[0].Register.WriteMask) &&
533        is_unswizzled(&next->FullInstruction.Src[0],
534                      next->FullInstruction.Dst[0].Register.WriteMask)) {
535       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
536 
537       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
538       set_neutral_element_swizzle(
539          &current->FullInstruction.Src[0],
540          next->FullInstruction.Dst[0].Register.WriteMask,
541          op_neutral_element(current->FullInstruction.Instruction.Opcode));
542 
543       current->FullInstruction.Dst[0].Register.WriteMask =
544          current->FullInstruction.Dst[0].Register.WriteMask |
545          next->FullInstruction.Dst[0].Register.WriteMask;
546       return;
547    }
548 
549    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
550        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
551        op_commutes(current->FullInstruction.Instruction.Opcode) &&
552        current->FullInstruction.Instruction.Saturate ==
553           next->FullInstruction.Instruction.Saturate &&
554        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
555        same_dst_reg(&next->FullInstruction.Dst[0],
556                     &current->FullInstruction.Dst[0]) &&
557        same_src_reg(&next->FullInstruction.Src[0],
558                     &current->FullInstruction.Src[0]) &&
559        !same_src_dst_reg(&next->FullInstruction.Src[0],
560                          &current->FullInstruction.Dst[0]) &&
561        is_unswizzled(&current->FullInstruction.Src[0],
562                      current->FullInstruction.Dst[0].Register.WriteMask) &&
563        is_unswizzled(&current->FullInstruction.Src[1],
564                      current->FullInstruction.Dst[0].Register.WriteMask) &&
565        is_unswizzled(&next->FullInstruction.Src[0],
566                      next->FullInstruction.Dst[0].Register.WriteMask)) {
567       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
568 
569       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
570       set_neutral_element_swizzle(
571          &current->FullInstruction.Src[1],
572          next->FullInstruction.Dst[0].Register.WriteMask,
573          op_neutral_element(current->FullInstruction.Instruction.Opcode));
574 
575       current->FullInstruction.Dst[0].Register.WriteMask =
576          current->FullInstruction.Dst[0].Register.WriteMask |
577          next->FullInstruction.Dst[0].Register.WriteMask;
578       return;
579    }
580 }
581 
582 /*
583  * Optimize away things like:
584  *    MOV TEMP[0].xyz TEMP[0].xyzx
585  * into:
586  *    NOP
587  */
588 static bool
i915_fpc_useless_mov(union tgsi_full_token * tgsi_current)589 i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
590 {
591    union i915_full_token current;
592    copy_token(&current, tgsi_current);
593    if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
594        current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
595        op_has_dst(current.FullInstruction.Instruction.Opcode) &&
596        !current.FullInstruction.Instruction.Saturate &&
597        current.FullInstruction.Src[0].Register.Absolute == 0 &&
598        current.FullInstruction.Src[0].Register.Negate == 0 &&
599        is_unswizzled(&current.FullInstruction.Src[0],
600                      current.FullInstruction.Dst[0].Register.WriteMask) &&
601        same_src_dst_reg(&current.FullInstruction.Src[0],
602                         &current.FullInstruction.Dst[0])) {
603       return true;
604    }
605    return false;
606 }
607 
608 /*
609  * Optimize away things like:
610  *    *** TEMP[0], TEMP[1], TEMP[2]
611  *    MOV OUT[0] TEMP[0]
612  * into:
613  *    *** OUT[0], TEMP[1], TEMP[2]
614  */
615 static void
i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)616 i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
617                                          struct i915_token_list *tokens,
618                                          int index)
619 {
620    union i915_full_token *current = &tokens->Tokens[index - 1];
621    union i915_full_token *next = &tokens->Tokens[index];
622 
623    // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
624    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
625        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
626        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
627        op_has_dst(current->FullInstruction.Instruction.Opcode) &&
628        !next->FullInstruction.Instruction.Saturate &&
629        next->FullInstruction.Src[0].Register.Absolute == 0 &&
630        next->FullInstruction.Src[0].Register.Negate == 0 &&
631        unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
632        current->FullInstruction.Dst[0].Register.WriteMask ==
633           TGSI_WRITEMASK_XYZW &&
634        is_unswizzled(&next->FullInstruction.Src[0],
635                      next->FullInstruction.Dst[0].Register.WriteMask) &&
636        current->FullInstruction.Dst[0].Register.WriteMask ==
637           next->FullInstruction.Dst[0].Register.WriteMask &&
638        same_src_dst_reg(&next->FullInstruction.Src[0],
639                         &current->FullInstruction.Dst[0])) {
640       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
641 
642       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
643       return;
644    }
645 }
646 
647 struct i915_token_list *
i915_optimize(const struct tgsi_token * tokens)648 i915_optimize(const struct tgsi_token *tokens)
649 {
650    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
651    struct tgsi_parse_context parse;
652    struct i915_optimize_context *ctx;
653    int i = 0;
654 
655    ctx = malloc(sizeof(*ctx));
656 
657    out_tokens->NumTokens = 0;
658 
659    /* Count the tokens */
660    tgsi_parse_init(&parse, tokens);
661    while (!tgsi_parse_end_of_tokens(&parse)) {
662       tgsi_parse_token(&parse);
663       out_tokens->NumTokens++;
664    }
665    tgsi_parse_free(&parse);
666 
667    /* Allocate our tokens */
668    out_tokens->Tokens =
669       MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
670 
671    tgsi_parse_init(&parse, tokens);
672    while (!tgsi_parse_end_of_tokens(&parse)) {
673       tgsi_parse_token(&parse);
674 
675       if (i915_fpc_useless_mov(&parse.FullToken)) {
676          out_tokens->NumTokens--;
677          continue;
678       }
679 
680       copy_token(&out_tokens->Tokens[i], &parse.FullToken);
681 
682       i++;
683    }
684    tgsi_parse_free(&parse);
685 
686    liveness_analysis(ctx, out_tokens);
687 
688    i = 1;
689    while (i < out_tokens->NumTokens) {
690       i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
691       i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1],
692                                       &out_tokens->Tokens[i]);
693       i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1],
694                                       &out_tokens->Tokens[i]);
695       i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
696       i++;
697    }
698 
699    free(ctx);
700 
701    return out_tokens;
702 }
703 
704 void
i915_optimize_free(struct i915_token_list * tokens)705 i915_optimize_free(struct i915_token_list *tokens)
706 {
707    free(tokens->Tokens);
708    free(tokens);
709 }
710