1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 
25 
26 #include "main/glheader.h"
27 #include "main/context.h"
28 #include "main/macros.h"
29 #include "program.h"
30 #include "prog_instruction.h"
31 #include "prog_optimize.h"
32 #include "prog_print.h"
33 
34 
35 #define MAX_LOOP_NESTING 50
36 /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
37  * register allocate many temporary values into that small number of
38  * temps.  So allow large temporary indices coming into the register
39  * allocator.
40  */
41 #define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
42 
43 static GLboolean dbg = GL_FALSE;
44 
45 #define NO_MASK 0xf
46 
47 /**
48  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
49  * are read from the given src in this instruction, We also provide
50  * one optional masks which may mask other components in the dst
51  * register
52  */
53 static GLuint
get_src_arg_mask(const struct prog_instruction * inst,GLuint arg,GLuint dst_mask)54 get_src_arg_mask(const struct prog_instruction *inst,
55                  GLuint arg, GLuint dst_mask)
56 {
57    GLuint read_mask, channel_mask;
58    GLuint comp;
59 
60    assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
61 
62    /* Form the dst register, find the written channels */
63    switch (inst->Opcode) {
64    case OPCODE_MOV:
65    case OPCODE_MIN:
66    case OPCODE_MAX:
67    case OPCODE_ABS:
68    case OPCODE_ADD:
69    case OPCODE_MAD:
70    case OPCODE_MUL:
71    case OPCODE_SUB:
72    case OPCODE_CMP:
73    case OPCODE_FLR:
74    case OPCODE_FRC:
75    case OPCODE_LRP:
76    case OPCODE_SGE:
77    case OPCODE_SLT:
78    case OPCODE_SSG:
79       channel_mask = inst->DstReg.WriteMask & dst_mask;
80       break;
81    case OPCODE_RCP:
82    case OPCODE_SIN:
83    case OPCODE_COS:
84    case OPCODE_RSQ:
85    case OPCODE_POW:
86    case OPCODE_EX2:
87    case OPCODE_LOG:
88       channel_mask = WRITEMASK_X;
89       break;
90    case OPCODE_DP2:
91       channel_mask = WRITEMASK_XY;
92       break;
93    case OPCODE_DP3:
94    case OPCODE_XPD:
95       channel_mask = WRITEMASK_XYZ;
96       break;
97    default:
98       channel_mask = WRITEMASK_XYZW;
99       break;
100    }
101 
102    /* Now, given the src swizzle and the written channels, find which
103     * components are actually read
104     */
105    read_mask = 0x0;
106    for (comp = 0; comp < 4; ++comp) {
107       const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
108       if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
109          read_mask |= 1 << coord;
110    }
111 
112    return read_mask;
113 }
114 
115 
116 /**
117  * For a MOV instruction, compute a write mask when src register also has
118  * a mask
119  */
120 static GLuint
get_dst_mask_for_mov(const struct prog_instruction * mov,GLuint src_mask)121 get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
122 {
123    const GLuint mask = mov->DstReg.WriteMask;
124    GLuint comp;
125    GLuint updated_mask = 0x0;
126 
127    assert(mov->Opcode == OPCODE_MOV);
128 
129    for (comp = 0; comp < 4; ++comp) {
130       GLuint src_comp;
131       if ((mask & (1 << comp)) == 0)
132          continue;
133       src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
134       if ((src_mask & (1 << src_comp)) == 0)
135          continue;
136       updated_mask |= 1 << comp;
137    }
138 
139    return updated_mask;
140 }
141 
142 
143 /**
144  * Ensure that the swizzle is regular.  That is, all of the swizzle
145  * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
146  */
147 static GLboolean
is_swizzle_regular(GLuint swz)148 is_swizzle_regular(GLuint swz)
149 {
150    return GET_SWZ(swz,0) <= SWIZZLE_W &&
151           GET_SWZ(swz,1) <= SWIZZLE_W &&
152           GET_SWZ(swz,2) <= SWIZZLE_W &&
153           GET_SWZ(swz,3) <= SWIZZLE_W;
154 }
155 
156 
157 /**
158  * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
159  * \return number of instructions removed
160  */
161 static GLuint
remove_instructions(struct gl_program * prog,const GLboolean * removeFlags,void * mem_ctx)162 remove_instructions(struct gl_program *prog, const GLboolean *removeFlags,
163                     void *mem_ctx)
164 {
165    GLint i, removeEnd = 0, removeCount = 0;
166    GLuint totalRemoved = 0;
167 
168    /* go backward */
169    for (i = prog->arb.NumInstructions - 1; i >= 0; i--) {
170       if (removeFlags[i]) {
171          totalRemoved++;
172          if (removeCount == 0) {
173             /* begin a run of instructions to remove */
174             removeEnd = i;
175             removeCount = 1;
176          }
177          else {
178             /* extend the run of instructions to remove */
179             removeCount++;
180          }
181       }
182       else {
183          /* don't remove this instruction, but check if the preceeding
184           * instructions are to be removed.
185           */
186          if (removeCount > 0) {
187             GLint removeStart = removeEnd - removeCount + 1;
188             _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
189             removeStart = removeCount = 0; /* reset removal info */
190          }
191       }
192    }
193    /* Finish removing if the first instruction was to be removed. */
194    if (removeCount > 0) {
195       GLint removeStart = removeEnd - removeCount + 1;
196       _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
197    }
198    return totalRemoved;
199 }
200 
201 
202 /**
203  * Remap register indexes according to map.
204  * \param prog  the program to search/replace
205  * \param file  the type of register file to search/replace
206  * \param map  maps old register indexes to new indexes
207  */
208 static void
replace_regs(struct gl_program * prog,gl_register_file file,const GLint map[])209 replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
210 {
211    GLuint i;
212 
213    for (i = 0; i < prog->arb.NumInstructions; i++) {
214       struct prog_instruction *inst = prog->arb.Instructions + i;
215       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
216       GLuint j;
217       for (j = 0; j < numSrc; j++) {
218          if (inst->SrcReg[j].File == file) {
219             GLuint index = inst->SrcReg[j].Index;
220             assert(map[index] >= 0);
221             inst->SrcReg[j].Index = map[index];
222          }
223       }
224       if (inst->DstReg.File == file) {
225          const GLuint index = inst->DstReg.Index;
226          assert(map[index] >= 0);
227          inst->DstReg.Index = map[index];
228       }
229    }
230 }
231 
232 
233 /**
234  * Remove dead instructions from the given program.
235  * This is very primitive for now.  Basically look for temp registers
236  * that are written to but never read.  Remove any instructions that
237  * write to such registers.  Be careful with condition code setters.
238  */
239 static GLboolean
_mesa_remove_dead_code_global(struct gl_program * prog,void * mem_ctx)240 _mesa_remove_dead_code_global(struct gl_program *prog, void *mem_ctx)
241 {
242    GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
243    GLboolean *removeInst; /* per-instruction removal flag */
244    GLuint i, rem = 0, comp;
245 
246    memset(tempRead, 0, sizeof(tempRead));
247 
248    if (dbg) {
249       printf("Optimize: Begin dead code removal\n");
250       /*_mesa_print_program(prog);*/
251    }
252 
253    removeInst =
254       calloc(prog->arb.NumInstructions, sizeof(GLboolean));
255 
256    /* Determine which temps are read and written */
257    for (i = 0; i < prog->arb.NumInstructions; i++) {
258       const struct prog_instruction *inst = prog->arb.Instructions + i;
259       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
260       GLuint j;
261 
262       /* check src regs */
263       for (j = 0; j < numSrc; j++) {
264          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
265             const GLuint index = inst->SrcReg[j].Index;
266             GLuint read_mask;
267             assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
268 	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
269 
270             if (inst->SrcReg[j].RelAddr) {
271                if (dbg)
272                   printf("abort remove dead code (indirect temp)\n");
273                goto done;
274             }
275 
276 	    for (comp = 0; comp < 4; comp++) {
277 	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
278                if (swz <= SWIZZLE_W) {
279                   if ((read_mask & (1 << swz)) == 0)
280                      continue;
281                   tempRead[index][swz] = GL_TRUE;
282                }
283 	    }
284          }
285       }
286 
287       /* check dst reg */
288       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
289          assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
290 
291          if (inst->DstReg.RelAddr) {
292             if (dbg)
293                printf("abort remove dead code (indirect temp)\n");
294             goto done;
295          }
296       }
297    }
298 
299    /* find instructions that write to dead registers, flag for removal */
300    for (i = 0; i < prog->arb.NumInstructions; i++) {
301       struct prog_instruction *inst = prog->arb.Instructions + i;
302       const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
303 
304       if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
305          GLint chan, index = inst->DstReg.Index;
306 
307 	 for (chan = 0; chan < 4; chan++) {
308 	    if (!tempRead[index][chan] &&
309 		inst->DstReg.WriteMask & (1 << chan)) {
310 	       if (dbg) {
311 		  printf("Remove writemask on %u.%c\n", i,
312 			       chan == 3 ? 'w' : 'x' + chan);
313 	       }
314 	       inst->DstReg.WriteMask &= ~(1 << chan);
315 	       rem++;
316 	    }
317 	 }
318 
319 	 if (inst->DstReg.WriteMask == 0) {
320 	    /* If we cleared all writes, the instruction can be removed. */
321 	    if (dbg)
322 	       printf("Remove instruction %u: \n", i);
323 	    removeInst[i] = GL_TRUE;
324 	 }
325       }
326    }
327 
328    /* now remove the instructions which aren't needed */
329    rem = remove_instructions(prog, removeInst, mem_ctx);
330 
331    if (dbg) {
332       printf("Optimize: End dead code removal.\n");
333       printf("  %u channel writes removed\n", rem);
334       printf("  %u instructions removed\n", rem);
335       /*_mesa_print_program(prog);*/
336    }
337 
338 done:
339    free(removeInst);
340    return rem != 0;
341 }
342 
343 
344 enum inst_use
345 {
346    READ,
347    WRITE,
348    FLOW,
349    END
350 };
351 
352 
353 /**
354  * Scan forward in program from 'start' for the next occurances of TEMP[index].
355  * We look if an instruction reads the component given by the masks and if they
356  * are overwritten.
357  * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
358  * that we can't look further.
359  */
360 static enum inst_use
find_next_use(const struct gl_program * prog,GLuint start,GLuint index,GLuint mask)361 find_next_use(const struct gl_program *prog,
362               GLuint start,
363               GLuint index,
364               GLuint mask)
365 {
366    GLuint i;
367 
368    for (i = start; i < prog->arb.NumInstructions; i++) {
369       const struct prog_instruction *inst = prog->arb.Instructions + i;
370       switch (inst->Opcode) {
371       case OPCODE_BGNLOOP:
372       case OPCODE_BGNSUB:
373       case OPCODE_CAL:
374       case OPCODE_CONT:
375       case OPCODE_IF:
376       case OPCODE_ELSE:
377       case OPCODE_ENDIF:
378       case OPCODE_ENDLOOP:
379       case OPCODE_ENDSUB:
380       case OPCODE_RET:
381          return FLOW;
382       case OPCODE_END:
383          return END;
384       default:
385          {
386             const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
387             GLuint j;
388             for (j = 0; j < numSrc; j++) {
389                if (inst->SrcReg[j].RelAddr ||
390                    (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
391                    inst->SrcReg[j].Index == (GLint)index &&
392                    (get_src_arg_mask(inst,j,NO_MASK) & mask)))
393                   return READ;
394             }
395             if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
396                 inst->DstReg.File == PROGRAM_TEMPORARY &&
397                 inst->DstReg.Index == index) {
398                mask &= ~inst->DstReg.WriteMask;
399                if (mask == 0)
400                   return WRITE;
401             }
402          }
403       }
404    }
405    return END;
406 }
407 
408 
409 /**
410  * Is the given instruction opcode a flow-control opcode?
411  * XXX maybe move this into prog_instruction.[ch]
412  */
413 static GLboolean
_mesa_is_flow_control_opcode(enum prog_opcode opcode)414 _mesa_is_flow_control_opcode(enum prog_opcode opcode)
415 {
416    switch (opcode) {
417    case OPCODE_BGNLOOP:
418    case OPCODE_BGNSUB:
419    case OPCODE_CAL:
420    case OPCODE_CONT:
421    case OPCODE_IF:
422    case OPCODE_ELSE:
423    case OPCODE_END:
424    case OPCODE_ENDIF:
425    case OPCODE_ENDLOOP:
426    case OPCODE_ENDSUB:
427    case OPCODE_RET:
428       return GL_TRUE;
429    default:
430       return GL_FALSE;
431    }
432 }
433 
434 
435 /**
436  * Test if the given instruction is a simple MOV (no conditional updating,
437  * not relative addressing, no negation/abs, etc).
438  */
439 static GLboolean
can_downward_mov_be_modifed(const struct prog_instruction * mov)440 can_downward_mov_be_modifed(const struct prog_instruction *mov)
441 {
442    return
443       mov->Opcode == OPCODE_MOV &&
444       mov->SrcReg[0].RelAddr == 0 &&
445       mov->SrcReg[0].Negate == 0 &&
446       mov->DstReg.RelAddr == 0;
447 }
448 
449 
450 static GLboolean
can_upward_mov_be_modifed(const struct prog_instruction * mov)451 can_upward_mov_be_modifed(const struct prog_instruction *mov)
452 {
453    return
454       can_downward_mov_be_modifed(mov) &&
455       mov->DstReg.File == PROGRAM_TEMPORARY &&
456       !mov->Saturate;
457 }
458 
459 
460 /**
461  * Try to remove use of extraneous MOV instructions, to free them up for dead
462  * code removal.
463  */
464 static void
_mesa_remove_extra_move_use(struct gl_program * prog)465 _mesa_remove_extra_move_use(struct gl_program *prog)
466 {
467    GLuint i, j;
468 
469    if (dbg) {
470       printf("Optimize: Begin remove extra move use\n");
471       _mesa_print_program(prog);
472    }
473 
474    /*
475     * Look for sequences such as this:
476     *    MOV tmpX, arg0;
477     *    ...
478     *    FOO tmpY, tmpX, arg1;
479     * and convert into:
480     *    MOV tmpX, arg0;
481     *    ...
482     *    FOO tmpY, arg0, arg1;
483     */
484 
485    for (i = 0; i + 1 < prog->arb.NumInstructions; i++) {
486       const struct prog_instruction *mov = prog->arb.Instructions + i;
487       GLuint dst_mask, src_mask;
488       if (can_upward_mov_be_modifed(mov) == GL_FALSE)
489          continue;
490 
491       /* Scanning the code, we maintain the components which are still active in
492        * these two masks
493        */
494       dst_mask = mov->DstReg.WriteMask;
495       src_mask = get_src_arg_mask(mov, 0, NO_MASK);
496 
497       /* Walk through remaining instructions until the or src reg gets
498        * rewritten or we get into some flow-control, eliminating the use of
499        * this MOV.
500        */
501       for (j = i + 1; j < prog->arb.NumInstructions; j++) {
502          struct prog_instruction *inst2 = prog->arb.Instructions + j;
503          GLuint arg;
504 
505 	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
506 	     break;
507 
508 	 /* First rewrite this instruction's args if appropriate. */
509 	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
510 	    GLuint comp, read_mask;
511 
512 	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
513 		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
514 		inst2->SrcReg[arg].RelAddr)
515 	       continue;
516             read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
517 
518 	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
519              * components read still come from the mov instructions
520              */
521             if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
522                (read_mask & dst_mask) == read_mask) {
523                for (comp = 0; comp < 4; comp++) {
524                   const GLuint inst2_swz =
525                      GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
526                   const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
527                   inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
528                   inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
529                   inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
530                                                   inst2_swz) & 0x1) << comp);
531                }
532                inst2->SrcReg[arg].File = mov->SrcReg[0].File;
533                inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
534             }
535 	 }
536 
537 	 /* The source of MOV is written. This potentially deactivates some
538           * components from the src and dst of the MOV instruction
539           */
540 	 if (inst2->DstReg.File == mov->DstReg.File &&
541 	     (inst2->DstReg.RelAddr ||
542 	      inst2->DstReg.Index == mov->DstReg.Index)) {
543             dst_mask &= ~inst2->DstReg.WriteMask;
544             src_mask = get_src_arg_mask(mov, 0, dst_mask);
545          }
546 
547          /* Idem when the destination of mov is written */
548 	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
549 	     (inst2->DstReg.RelAddr ||
550 	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
551             src_mask &= ~inst2->DstReg.WriteMask;
552             dst_mask &= get_dst_mask_for_mov(mov, src_mask);
553          }
554          if (dst_mask == 0)
555             break;
556       }
557    }
558 
559    if (dbg) {
560       printf("Optimize: End remove extra move use.\n");
561       /*_mesa_print_program(prog);*/
562    }
563 }
564 
565 
566 /**
567  * Complements dead_code_global. Try to remove code in block of code by
568  * carefully monitoring the swizzles. Both functions should be merged into one
569  * with a proper control flow graph
570  */
571 static GLboolean
_mesa_remove_dead_code_local(struct gl_program * prog,void * mem_ctx)572 _mesa_remove_dead_code_local(struct gl_program *prog, void *mem_ctx)
573 {
574    GLboolean *removeInst;
575    GLuint i, arg, rem = 0;
576 
577    removeInst =
578       calloc(prog->arb.NumInstructions, sizeof(GLboolean));
579 
580    for (i = 0; i < prog->arb.NumInstructions; i++) {
581       const struct prog_instruction *inst = prog->arb.Instructions + i;
582       const GLuint index = inst->DstReg.Index;
583       const GLuint mask = inst->DstReg.WriteMask;
584       enum inst_use use;
585 
586       /* We must deactivate the pass as soon as some indirection is used */
587       if (inst->DstReg.RelAddr)
588          goto done;
589       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
590          if (inst->SrcReg[arg].RelAddr)
591             goto done;
592 
593       if (_mesa_is_flow_control_opcode(inst->Opcode) ||
594           _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
595           inst->DstReg.File != PROGRAM_TEMPORARY ||
596           inst->DstReg.RelAddr)
597          continue;
598 
599       use = find_next_use(prog, i+1, index, mask);
600       if (use == WRITE || use == END)
601          removeInst[i] = GL_TRUE;
602    }
603 
604    rem = remove_instructions(prog, removeInst, mem_ctx);
605 
606 done:
607    free(removeInst);
608    return rem != 0;
609 }
610 
611 
612 /**
613  * Try to inject the destination of mov as the destination of inst and recompute
614  * the swizzles operators for the sources of inst if required. Return GL_TRUE
615  * of the substitution was possible, GL_FALSE otherwise
616  */
617 static GLboolean
_mesa_merge_mov_into_inst(struct prog_instruction * inst,const struct prog_instruction * mov)618 _mesa_merge_mov_into_inst(struct prog_instruction *inst,
619                           const struct prog_instruction *mov)
620 {
621    /* Indirection table which associates destination and source components for
622     * the mov instruction
623     */
624    const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
625 
626    /* Some components are not written by inst. We cannot remove the mov */
627    if (mask != (inst->DstReg.WriteMask & mask))
628       return GL_FALSE;
629 
630    inst->Saturate |= mov->Saturate;
631 
632    /* Depending on the instruction, we may need to recompute the swizzles.
633     * Also, some other instructions (like TEX) are not linear. We will only
634     * consider completely active sources and destinations
635     */
636    switch (inst->Opcode) {
637 
638    /* Carstesian instructions: we compute the swizzle */
639    case OPCODE_MOV:
640    case OPCODE_MIN:
641    case OPCODE_MAX:
642    case OPCODE_ABS:
643    case OPCODE_ADD:
644    case OPCODE_MAD:
645    case OPCODE_MUL:
646    case OPCODE_SUB:
647    {
648       GLuint dst_to_src_comp[4] = {0,0,0,0};
649       GLuint dst_comp, arg;
650       for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
651          if (mov->DstReg.WriteMask & (1 << dst_comp)) {
652             const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
653             assert(src_comp < 4);
654             dst_to_src_comp[dst_comp] = src_comp;
655          }
656       }
657 
658       /* Patch each source of the instruction */
659       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
660          const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
661          inst->SrcReg[arg].Swizzle = 0;
662 
663          /* Reset each active component of the swizzle */
664          for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
665             GLuint src_comp, arg_comp;
666             if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
667                continue;
668             src_comp = dst_to_src_comp[dst_comp];
669             assert(src_comp < 4);
670             arg_comp = GET_SWZ(arg_swz, src_comp);
671             assert(arg_comp < 4);
672             inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
673          }
674       }
675       inst->DstReg = mov->DstReg;
676       return GL_TRUE;
677    }
678 
679    /* Dot products and scalar instructions: we only change the destination */
680    case OPCODE_RCP:
681    case OPCODE_SIN:
682    case OPCODE_COS:
683    case OPCODE_RSQ:
684    case OPCODE_POW:
685    case OPCODE_EX2:
686    case OPCODE_LOG:
687    case OPCODE_DP2:
688    case OPCODE_DP3:
689    case OPCODE_DP4:
690       inst->DstReg = mov->DstReg;
691       return GL_TRUE;
692 
693    /* All other instructions require fully active components with no swizzle */
694    default:
695       if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
696           inst->DstReg.WriteMask != WRITEMASK_XYZW)
697          return GL_FALSE;
698       inst->DstReg = mov->DstReg;
699       return GL_TRUE;
700    }
701 }
702 
703 
704 /**
705  * Try to remove extraneous MOV instructions from the given program.
706  */
707 static GLboolean
_mesa_remove_extra_moves(struct gl_program * prog,void * mem_ctx)708 _mesa_remove_extra_moves(struct gl_program *prog, void *mem_ctx)
709 {
710    GLboolean *removeInst; /* per-instruction removal flag */
711    GLuint i, rem = 0, nesting = 0;
712 
713    if (dbg) {
714       printf("Optimize: Begin remove extra moves\n");
715       _mesa_print_program(prog);
716    }
717 
718    removeInst =
719       calloc(prog->arb.NumInstructions, sizeof(GLboolean));
720 
721    /*
722     * Look for sequences such as this:
723     *    FOO tmpX, arg0, arg1;
724     *    MOV tmpY, tmpX;
725     * and convert into:
726     *    FOO tmpY, arg0, arg1;
727     */
728 
729    for (i = 0; i < prog->arb.NumInstructions; i++) {
730       const struct prog_instruction *mov = prog->arb.Instructions + i;
731 
732       switch (mov->Opcode) {
733       case OPCODE_BGNLOOP:
734       case OPCODE_BGNSUB:
735       case OPCODE_IF:
736          nesting++;
737          break;
738       case OPCODE_ENDLOOP:
739       case OPCODE_ENDSUB:
740       case OPCODE_ENDIF:
741          nesting--;
742          break;
743       case OPCODE_MOV:
744          if (i > 0 &&
745              can_downward_mov_be_modifed(mov) &&
746              mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
747              nesting == 0)
748          {
749 
750             /* see if this MOV can be removed */
751             const GLuint id = mov->SrcReg[0].Index;
752             struct prog_instruction *prevInst;
753             GLuint prevI;
754 
755             /* get pointer to previous instruction */
756             prevI = i - 1;
757             while (prevI > 0 && removeInst[prevI])
758                prevI--;
759             prevInst = prog->arb.Instructions + prevI;
760 
761             if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
762                 prevInst->DstReg.Index == id &&
763                 prevInst->DstReg.RelAddr == 0) {
764 
765                const GLuint dst_mask = prevInst->DstReg.WriteMask;
766                enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
767 
768                if (next_use == WRITE || next_use == END) {
769                   /* OK, we can safely remove this MOV instruction.
770                    * Transform:
771                    *   prevI: FOO tempIndex, x, y;
772                    *       i: MOV z, tempIndex;
773                    * Into:
774                    *   prevI: FOO z, x, y;
775                    */
776                   if (_mesa_merge_mov_into_inst(prevInst, mov)) {
777                      removeInst[i] = GL_TRUE;
778                      if (dbg) {
779                         printf("Remove MOV at %u\n", i);
780                         printf("new prev inst %u: ", prevI);
781                         _mesa_print_instruction(prevInst);
782                      }
783                   }
784                }
785             }
786          }
787          break;
788       default:
789          ; /* nothing */
790       }
791    }
792 
793    /* now remove the instructions which aren't needed */
794    rem = remove_instructions(prog, removeInst, mem_ctx);
795 
796    free(removeInst);
797 
798    if (dbg) {
799       printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
800       /*_mesa_print_program(prog);*/
801    }
802 
803    return rem != 0;
804 }
805 
806 
807 /** A live register interval */
808 struct interval
809 {
810    GLuint Reg;         /** The temporary register index */
811    GLuint Start, End;  /** Start/end instruction numbers */
812 };
813 
814 
815 /** A list of register intervals */
816 struct interval_list
817 {
818    GLuint Num;
819    struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
820 };
821 
822 
823 static void
append_interval(struct interval_list * list,const struct interval * inv)824 append_interval(struct interval_list *list, const struct interval *inv)
825 {
826    list->Intervals[list->Num++] = *inv;
827 }
828 
829 
830 /** Insert interval inv into list, sorted by interval end */
831 static void
insert_interval_by_end(struct interval_list * list,const struct interval * inv)832 insert_interval_by_end(struct interval_list *list, const struct interval *inv)
833 {
834    /* XXX we could do a binary search insertion here since list is sorted */
835    GLint i = list->Num - 1;
836    while (i >= 0 && list->Intervals[i].End > inv->End) {
837       list->Intervals[i + 1] = list->Intervals[i];
838       i--;
839    }
840    list->Intervals[i + 1] = *inv;
841    list->Num++;
842 
843 #ifdef DEBUG
844    {
845       GLuint i;
846       for (i = 0; i + 1 < list->Num; i++) {
847          assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
848       }
849    }
850 #endif
851 }
852 
853 
854 /** Remove the given interval from the interval list */
855 static void
remove_interval(struct interval_list * list,const struct interval * inv)856 remove_interval(struct interval_list *list, const struct interval *inv)
857 {
858    /* XXX we could binary search since list is sorted */
859    GLuint k;
860    for (k = 0; k < list->Num; k++) {
861       if (list->Intervals[k].Reg == inv->Reg) {
862          /* found, remove it */
863          assert(list->Intervals[k].Start == inv->Start);
864          assert(list->Intervals[k].End == inv->End);
865          while (k < list->Num - 1) {
866             list->Intervals[k] = list->Intervals[k + 1];
867             k++;
868          }
869          list->Num--;
870          return;
871       }
872    }
873 }
874 
875 
876 /** called by qsort() */
877 static int
compare_start(const void * a,const void * b)878 compare_start(const void *a, const void *b)
879 {
880    const struct interval *ia = (const struct interval *) a;
881    const struct interval *ib = (const struct interval *) b;
882    if (ia->Start < ib->Start)
883       return -1;
884    else if (ia->Start > ib->Start)
885       return +1;
886    else
887       return 0;
888 }
889 
890 
891 /** sort the interval list according to interval starts */
892 static void
sort_interval_list_by_start(struct interval_list * list)893 sort_interval_list_by_start(struct interval_list *list)
894 {
895    qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
896 #ifdef DEBUG
897    {
898       GLuint i;
899       for (i = 0; i + 1 < list->Num; i++) {
900          assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
901       }
902    }
903 #endif
904 }
905 
906 struct loop_info
907 {
908    GLuint Start, End;  /**< Start, end instructions of loop */
909 };
910 
911 /**
912  * Update the intermediate interval info for register 'index' and
913  * instruction 'ic'.
914  */
915 static void
update_interval(GLint intBegin[],GLint intEnd[],struct loop_info * loopStack,GLuint loopStackDepth,GLuint index,GLuint ic)916 update_interval(GLint intBegin[], GLint intEnd[],
917 		struct loop_info *loopStack, GLuint loopStackDepth,
918 		GLuint index, GLuint ic)
919 {
920    unsigned i;
921    GLuint begin = ic;
922    GLuint end = ic;
923 
924    /* If the register is used in a loop, extend its lifetime through the end
925     * of the outermost loop that doesn't contain its definition.
926     */
927    for (i = 0; i < loopStackDepth; i++) {
928       if (intBegin[index] < loopStack[i].Start) {
929 	 end = loopStack[i].End;
930 	 break;
931       }
932    }
933 
934    /* Variables that are live at the end of a loop will also be live at the
935     * beginning, so an instruction inside of a loop should have its live
936     * interval begin at the start of the outermost loop.
937     */
938    if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
939       begin = loopStack[0].Start;
940    }
941 
942    assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
943    if (intBegin[index] == -1) {
944       assert(intEnd[index] == -1);
945       intBegin[index] = begin;
946       intEnd[index] = end;
947    }
948    else {
949       intEnd[index] = end;
950    }
951 }
952 
953 
954 /**
955  * Find first/last instruction that references each temporary register.
956  */
957 static bool
find_temp_intervals(const struct prog_instruction * instructions,GLuint numInstructions,GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])958 find_temp_intervals(const struct prog_instruction *instructions,
959                     GLuint numInstructions,
960                     GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
961                     GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
962 {
963    struct loop_info loopStack[MAX_LOOP_NESTING];
964    GLuint loopStackDepth = 0;
965    GLuint i;
966 
967    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
968       intBegin[i] = intEnd[i] = -1;
969    }
970 
971    /* Scan instructions looking for temporary registers */
972    for (i = 0; i < numInstructions; i++) {
973       const struct prog_instruction *inst = instructions + i;
974       if (inst->Opcode == OPCODE_BGNLOOP) {
975          loopStack[loopStackDepth].Start = i;
976          loopStack[loopStackDepth].End = inst->BranchTarget;
977          loopStackDepth++;
978       }
979       else if (inst->Opcode == OPCODE_ENDLOOP) {
980          loopStackDepth--;
981       }
982       else if (inst->Opcode == OPCODE_CAL) {
983          return GL_FALSE;
984       }
985       else {
986          const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
987          GLuint j;
988          for (j = 0; j < numSrc; j++) {
989             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
990                const GLuint index = inst->SrcReg[j].Index;
991                if (inst->SrcReg[j].RelAddr)
992                   return GL_FALSE;
993                update_interval(intBegin, intEnd, loopStack, loopStackDepth,
994 			       index, i);
995             }
996          }
997          if (inst->DstReg.File == PROGRAM_TEMPORARY) {
998             const GLuint index = inst->DstReg.Index;
999             if (inst->DstReg.RelAddr)
1000                return GL_FALSE;
1001             update_interval(intBegin, intEnd, loopStack, loopStackDepth,
1002 			    index, i);
1003          }
1004       }
1005    }
1006 
1007    return GL_TRUE;
1008 }
1009 
1010 
1011 /**
1012  * Find the live intervals for each temporary register in the program.
1013  * For register R, the interval [A,B] indicates that R is referenced
1014  * from instruction A through instruction B.
1015  * Special consideration is needed for loops and subroutines.
1016  * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
1017  */
1018 static GLboolean
find_live_intervals(struct gl_program * prog,struct interval_list * liveIntervals)1019 find_live_intervals(struct gl_program *prog,
1020                     struct interval_list *liveIntervals)
1021 {
1022    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1023    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1024    GLuint i;
1025 
1026    /*
1027     * Note: we'll return GL_FALSE below if we find relative indexing
1028     * into the TEMP register file.  We can't handle that yet.
1029     * We also give up on subroutines for now.
1030     */
1031 
1032    if (dbg) {
1033       printf("Optimize: Begin find intervals\n");
1034    }
1035 
1036    /* build intermediate arrays */
1037    if (!find_temp_intervals(prog->arb.Instructions, prog->arb.NumInstructions,
1038                             intBegin, intEnd))
1039       return GL_FALSE;
1040 
1041    /* Build live intervals list from intermediate arrays */
1042    liveIntervals->Num = 0;
1043    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1044       if (intBegin[i] >= 0) {
1045          struct interval inv;
1046          inv.Reg = i;
1047          inv.Start = intBegin[i];
1048          inv.End = intEnd[i];
1049          append_interval(liveIntervals, &inv);
1050       }
1051    }
1052 
1053    /* Sort the list according to interval starts */
1054    sort_interval_list_by_start(liveIntervals);
1055 
1056    if (dbg) {
1057       /* print interval info */
1058       for (i = 0; i < liveIntervals->Num; i++) {
1059          const struct interval *inv = liveIntervals->Intervals + i;
1060          printf("Reg[%d] live [%d, %d]:",
1061                       inv->Reg, inv->Start, inv->End);
1062          if (1) {
1063             GLuint j;
1064             for (j = 0; j < inv->Start; j++)
1065                printf(" ");
1066             for (j = inv->Start; j <= inv->End; j++)
1067                printf("x");
1068          }
1069          printf("\n");
1070       }
1071    }
1072 
1073    return GL_TRUE;
1074 }
1075 
1076 
1077 /** Scan the array of used register flags to find free entry */
1078 static GLint
alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])1079 alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
1080 {
1081    GLuint k;
1082    for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
1083       if (!usedRegs[k]) {
1084          usedRegs[k] = GL_TRUE;
1085          return k;
1086       }
1087    }
1088    return -1;
1089 }
1090 
1091 
1092 /**
1093  * This function implements "Linear Scan Register Allocation" to reduce
1094  * the number of temporary registers used by the program.
1095  *
1096  * We compute the "live interval" for all temporary registers then
1097  * examine the overlap of the intervals to allocate new registers.
1098  * Basically, if two intervals do not overlap, they can use the same register.
1099  */
1100 static void
_mesa_reallocate_registers(struct gl_program * prog)1101 _mesa_reallocate_registers(struct gl_program *prog)
1102 {
1103    struct interval_list liveIntervals;
1104    GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1105    GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1106    GLuint i;
1107    GLint maxTemp = -1;
1108 
1109    if (dbg) {
1110       printf("Optimize: Begin live-interval register reallocation\n");
1111       _mesa_print_program(prog);
1112    }
1113 
1114    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
1115       registerMap[i] = -1;
1116       usedRegs[i] = GL_FALSE;
1117    }
1118 
1119    if (!find_live_intervals(prog, &liveIntervals)) {
1120       if (dbg)
1121          printf("Aborting register reallocation\n");
1122       return;
1123    }
1124 
1125    {
1126       struct interval_list activeIntervals;
1127       activeIntervals.Num = 0;
1128 
1129       /* loop over live intervals, allocating a new register for each */
1130       for (i = 0; i < liveIntervals.Num; i++) {
1131          const struct interval *live = liveIntervals.Intervals + i;
1132 
1133          if (dbg)
1134             printf("Consider register %u\n", live->Reg);
1135 
1136          /* Expire old intervals.  Intervals which have ended with respect
1137           * to the live interval can have their remapped registers freed.
1138           */
1139          {
1140             GLint j;
1141             for (j = 0; j < (GLint) activeIntervals.Num; j++) {
1142                const struct interval *inv = activeIntervals.Intervals + j;
1143                if (inv->End >= live->Start) {
1144                   /* Stop now.  Since the activeInterval list is sorted
1145                    * we know we don't have to go further.
1146                    */
1147                   break;
1148                }
1149                else {
1150                   /* Interval 'inv' has expired */
1151                   const GLint regNew = registerMap[inv->Reg];
1152                   assert(regNew >= 0);
1153 
1154                   if (dbg)
1155                      printf("  expire interval for reg %u\n", inv->Reg);
1156 
1157                   /* remove interval j from active list */
1158                   remove_interval(&activeIntervals, inv);
1159                   j--;  /* counter-act j++ in for-loop above */
1160 
1161                   /* return register regNew to the free pool */
1162                   if (dbg)
1163                      printf("  free reg %d\n", regNew);
1164                   assert(usedRegs[regNew] == GL_TRUE);
1165                   usedRegs[regNew] = GL_FALSE;
1166                }
1167             }
1168          }
1169 
1170          /* find a free register for this live interval */
1171          {
1172             const GLint k = alloc_register(usedRegs);
1173             if (k < 0) {
1174                /* out of registers, give up */
1175                return;
1176             }
1177             registerMap[live->Reg] = k;
1178             maxTemp = MAX2(maxTemp, k);
1179             if (dbg)
1180                printf("  remap register %u -> %d\n", live->Reg, k);
1181          }
1182 
1183          /* Insert this live interval into the active list which is sorted
1184           * by increasing end points.
1185           */
1186          insert_interval_by_end(&activeIntervals, live);
1187       }
1188    }
1189 
1190    if (maxTemp + 1 < (GLint) liveIntervals.Num) {
1191       /* OK, we've reduced the number of registers needed.
1192        * Scan the program and replace all the old temporary register
1193        * indexes with the new indexes.
1194        */
1195       replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
1196 
1197       prog->arb.NumTemporaries = maxTemp + 1;
1198    }
1199 
1200    if (dbg) {
1201       printf("Optimize: End live-interval register reallocation\n");
1202       printf("Num temp regs before: %u  after: %u\n",
1203                    liveIntervals.Num, maxTemp + 1);
1204       _mesa_print_program(prog);
1205    }
1206 }
1207 
1208 
1209 #if 0
1210 static void
1211 print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
1212    fprintf(stderr, "%s (%u inst):\n", txt, program->arb.NumInstructions);
1213    _mesa_print_program(program);
1214    _mesa_print_program_parameters(ctx, program);
1215    fprintf(stderr, "\n\n");
1216 }
1217 #endif
1218 
1219 /**
1220  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
1221  * instruction is the first instruction to write to register T0.  The are
1222  * several lowering passes done in GLSL IR (e.g. branches and
1223  * relative addressing) that create a large number of conditional assignments
1224  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
1225  *
1226  * Here is why this conversion is safe:
1227  * CMP T0, T1 T2 T0 can be expanded to:
1228  * if (T1 < 0.0)
1229  * 	MOV T0, T2;
1230  * else
1231  * 	MOV T0, T0;
1232  *
1233  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
1234  * as the original program.  If (T1 < 0.0) evaluates to false, executing
1235  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
1236  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
1237  * because any instruction that was going to read from T0 after this was going
1238  * to read a garbage value anyway.
1239  */
1240 static void
_mesa_simplify_cmp(struct gl_program * program)1241 _mesa_simplify_cmp(struct gl_program * program)
1242 {
1243    GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1244    GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
1245    GLuint i;
1246 
1247    if (dbg) {
1248       printf("Optimize: Begin reads without writes\n");
1249       _mesa_print_program(program);
1250    }
1251 
1252    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1253       tempWrites[i] = 0;
1254    }
1255 
1256    for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
1257       outputWrites[i] = 0;
1258    }
1259 
1260    for (i = 0; i < program->arb.NumInstructions; i++) {
1261       struct prog_instruction *inst = program->arb.Instructions + i;
1262       GLuint prevWriteMask;
1263 
1264       /* Give up if we encounter relative addressing or flow control. */
1265       if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
1266          return;
1267       }
1268 
1269       if (inst->DstReg.File == PROGRAM_OUTPUT) {
1270          assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
1271          prevWriteMask = outputWrites[inst->DstReg.Index];
1272          outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
1273       } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1274          assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
1275          prevWriteMask = tempWrites[inst->DstReg.Index];
1276          tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
1277       } else {
1278          /* No other register type can be a destination register. */
1279          continue;
1280       }
1281 
1282       /* For a CMP to be considered a conditional write, the destination
1283        * register and source register two must be the same. */
1284       if (inst->Opcode == OPCODE_CMP
1285           && !(inst->DstReg.WriteMask & prevWriteMask)
1286           && inst->SrcReg[2].File == inst->DstReg.File
1287           && inst->SrcReg[2].Index == inst->DstReg.Index
1288           && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
1289 
1290          inst->Opcode = OPCODE_MOV;
1291          inst->SrcReg[0] = inst->SrcReg[1];
1292 
1293 	 /* Unused operands are expected to have the file set to
1294 	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
1295 	  * all of the sources.
1296 	  */
1297 	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
1298 	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
1299 	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
1300 	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
1301       }
1302    }
1303    if (dbg) {
1304       printf("Optimize: End reads without writes\n");
1305       _mesa_print_program(program);
1306    }
1307 }
1308 
1309 /**
1310  * Apply optimizations to the given program to eliminate unnecessary
1311  * instructions, temp regs, etc.
1312  */
1313 void
_mesa_optimize_program(struct gl_program * program,void * mem_ctx)1314 _mesa_optimize_program(struct gl_program *program, void *mem_ctx)
1315 {
1316    GLboolean any_change;
1317 
1318    _mesa_simplify_cmp(program);
1319    /* Stop when no modifications were output */
1320    do {
1321       any_change = GL_FALSE;
1322       _mesa_remove_extra_move_use(program);
1323       if (_mesa_remove_dead_code_global(program, mem_ctx))
1324          any_change = GL_TRUE;
1325       if (_mesa_remove_extra_moves(program, mem_ctx))
1326          any_change = GL_TRUE;
1327       if (_mesa_remove_dead_code_local(program, mem_ctx))
1328          any_change = GL_TRUE;
1329 
1330       any_change = _mesa_constant_fold(program) || any_change;
1331       _mesa_reallocate_registers(program);
1332    } while (any_change);
1333 }
1334 
1335