1 /**************************************************************************
2  *
3  * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 /* This file contains the vertex shader tranformations for SW TCL needed
28  * to overcome the limitations of the r300 rasterizer.
29  *
30  * Transformations:
31  * 1) If the secondary color output is present, the primary color must be
32  *    present too.
33  * 2) If any back-face color output is present, there must be all 4 color
34  *    outputs and missing ones must be inserted.
35  * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS.
36  *
37  * I know this code is cumbersome, but I don't know of any nicer way
38  * of transforming TGSI shaders. ~ M.
39  */
40 
41 #include "r300_vs.h"
42 
43 #include <stdio.h>
44 
45 #include "tgsi/tgsi_transform.h"
46 #include "tgsi/tgsi_dump.h"
47 
48 #include "draw/draw_context.h"
49 
50 struct vs_transform_context {
51     struct tgsi_transform_context base;
52 
53     boolean color_used[2];
54     boolean bcolor_used[2];
55 
56     /* Index of the pos output, typically 0. */
57     unsigned pos_output;
58     /* Index of the pos temp where all writes of pos are redirected to. */
59     unsigned pos_temp;
60     /* The index of the last generic output, after which we insert a new
61      * output for WPOS. */
62     int last_generic;
63 
64     unsigned num_outputs;
65     /* Used to shift output decl. indices when inserting new ones. */
66     unsigned decl_shift;
67     /* Used to remap writes to output decls if their indices changed. */
68     unsigned out_remap[32];
69 
70     /* First instruction processed? */
71     boolean first_instruction;
72     /* End instruction processed? */
73     boolean end_instruction;
74 
75     boolean temp_used[1024];
76 };
77 
emit_temp(struct tgsi_transform_context * ctx,unsigned reg)78 static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg)
79 {
80     struct tgsi_full_declaration decl;
81 
82     decl = tgsi_default_full_declaration();
83     decl.Declaration.File = TGSI_FILE_TEMPORARY;
84     decl.Range.First = decl.Range.Last = reg;
85     ctx->emit_declaration(ctx, &decl);
86 }
87 
emit_output(struct tgsi_transform_context * ctx,unsigned name,unsigned index,unsigned interp,unsigned reg)88 static void emit_output(struct tgsi_transform_context *ctx,
89                         unsigned name, unsigned index, unsigned interp,
90                         unsigned reg)
91 {
92     struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
93     struct tgsi_full_declaration decl;
94 
95     decl = tgsi_default_full_declaration();
96     decl.Declaration.File = TGSI_FILE_OUTPUT;
97     decl.Declaration.Interpolate = 1;
98     decl.Declaration.Semantic = TRUE;
99     decl.Semantic.Name = name;
100     decl.Semantic.Index = index;
101     decl.Range.First = decl.Range.Last = reg;
102     decl.Interp.Interpolate = interp;
103     ctx->emit_declaration(ctx, &decl);
104     ++vsctx->num_outputs;
105 }
106 
insert_output_before(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * before,unsigned name,unsigned index,unsigned interp)107 static void insert_output_before(struct tgsi_transform_context *ctx,
108                                  struct tgsi_full_declaration *before,
109                                  unsigned name, unsigned index, unsigned interp)
110 {
111     struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
112     unsigned i;
113 
114     /* Make a place for the new output. */
115     for (i = before->Range.First; i < ARRAY_SIZE(vsctx->out_remap); i++) {
116         ++vsctx->out_remap[i];
117     }
118 
119     /* Insert the new output. */
120     emit_output(ctx, name, index, interp,
121                 before->Range.First + vsctx->decl_shift);
122 
123     ++vsctx->decl_shift;
124 }
125 
insert_output_after(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * after,unsigned name,unsigned index,unsigned interp)126 static void insert_output_after(struct tgsi_transform_context *ctx,
127                                 struct tgsi_full_declaration *after,
128                                 unsigned name, unsigned index, unsigned interp)
129 {
130     struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
131     unsigned i;
132 
133     /* Make a place for the new output. */
134     for (i = after->Range.First+1; i < ARRAY_SIZE(vsctx->out_remap); i++) {
135         ++vsctx->out_remap[i];
136     }
137 
138     /* Insert the new output. */
139     emit_output(ctx, name, index, interp,
140                 after->Range.First + 1);
141 
142     ++vsctx->decl_shift;
143 }
144 
transform_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)145 static void transform_decl(struct tgsi_transform_context *ctx,
146                            struct tgsi_full_declaration *decl)
147 {
148     struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
149     unsigned i;
150 
151     if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
152         switch (decl->Semantic.Name) {
153             case TGSI_SEMANTIC_POSITION:
154                 vsctx->pos_output = decl->Range.First;
155                 break;
156 
157             case TGSI_SEMANTIC_COLOR:
158                 assert(decl->Semantic.Index < 2);
159 
160                 /* We must rasterize the first color if the second one is
161                  * used, otherwise the rasterizer doesn't do the color
162                  * selection correctly. Declare it, but don't write to it. */
163                 if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) {
164                     insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
165                                          TGSI_INTERPOLATE_LINEAR);
166                     vsctx->color_used[0] = TRUE;
167                 }
168                 break;
169 
170             case TGSI_SEMANTIC_BCOLOR:
171                 assert(decl->Semantic.Index < 2);
172 
173                 /* We must rasterize all 4 colors if back-face colors are
174                  * used, otherwise the rasterizer doesn't do the color
175                  * selection correctly. Declare it, but don't write to it. */
176                 if (!vsctx->color_used[0]) {
177                     insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
178                                          TGSI_INTERPOLATE_LINEAR);
179                     vsctx->color_used[0] = TRUE;
180                 }
181                 if (!vsctx->color_used[1]) {
182                     insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
183                                          TGSI_INTERPOLATE_LINEAR);
184                     vsctx->color_used[1] = TRUE;
185                 }
186                 if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
187                     insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
188                                          TGSI_INTERPOLATE_LINEAR);
189                     vsctx->bcolor_used[0] = TRUE;
190                 }
191                 break;
192 
193             case TGSI_SEMANTIC_GENERIC:
194                 vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index);
195                 break;
196         }
197 
198         /* Since we're inserting new outputs in between, the following outputs
199          * should be moved to the right so that they don't overlap with
200          * the newly added ones. */
201         decl->Range.First += vsctx->decl_shift;
202         decl->Range.Last += vsctx->decl_shift;
203 
204         ++vsctx->num_outputs;
205     } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
206         for (i = decl->Range.First; i <= decl->Range.Last; i++) {
207            vsctx->temp_used[i] = TRUE;
208         }
209     }
210 
211     ctx->emit_declaration(ctx, decl);
212 
213     /* Insert BCOLOR1 if needed. */
214     if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
215         decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR &&
216         !vsctx->bcolor_used[1]) {
217         insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1,
218                             TGSI_INTERPOLATE_LINEAR);
219     }
220 }
221 
transform_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)222 static void transform_inst(struct tgsi_transform_context *ctx,
223                            struct tgsi_full_instruction *inst)
224 {
225     struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx;
226     struct tgsi_full_instruction new_inst;
227     unsigned i;
228 
229     if (!vsctx->first_instruction) {
230         vsctx->first_instruction = TRUE;
231 
232         /* Insert the generic output for WPOS. */
233         emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1,
234                     TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs);
235 
236         /* Find a free temp for POSITION. */
237         for (i = 0; i < ARRAY_SIZE(vsctx->temp_used); i++) {
238             if (!vsctx->temp_used[i]) {
239                 emit_temp(ctx, i);
240                 vsctx->pos_temp = i;
241                 break;
242             }
243         }
244     }
245 
246     if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
247         /* MOV OUT[pos_output], TEMP[pos_temp]; */
248         new_inst = tgsi_default_full_instruction();
249         new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
250         new_inst.Instruction.NumDstRegs = 1;
251         new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
252         new_inst.Dst[0].Register.Index = vsctx->pos_output;
253         new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
254         new_inst.Instruction.NumSrcRegs = 1;
255         new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
256         new_inst.Src[0].Register.Index = vsctx->pos_temp;
257         ctx->emit_instruction(ctx, &new_inst);
258 
259         /* MOV OUT[n-1], TEMP[pos_temp]; */
260         new_inst = tgsi_default_full_instruction();
261         new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
262         new_inst.Instruction.NumDstRegs = 1;
263         new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
264         new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1;
265         new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
266         new_inst.Instruction.NumSrcRegs = 1;
267         new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
268         new_inst.Src[0].Register.Index = vsctx->pos_temp;
269         ctx->emit_instruction(ctx, &new_inst);
270 
271         vsctx->end_instruction = TRUE;
272     } else {
273         /* Not an END instruction. */
274         /* Fix writes to outputs. */
275         for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
276             struct tgsi_full_dst_register *dst = &inst->Dst[i];
277             if (dst->Register.File == TGSI_FILE_OUTPUT) {
278                 if (dst->Register.Index == vsctx->pos_output) {
279                     /* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */
280                     dst->Register.File = TGSI_FILE_TEMPORARY;
281                     dst->Register.Index = vsctx->pos_temp;
282                 } else {
283                     /* Not a position, good...
284                      * Since we were changing the indices of output decls,
285                      * we must redirect writes into them too. */
286                     dst->Register.Index = vsctx->out_remap[dst->Register.Index];
287                 }
288             }
289         }
290 
291         /* Inserting 2 instructions before the END opcode moves all following
292          * labels by 2. Subroutines are always after the END opcode so
293          * they're always moved. */
294         if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) {
295             inst->Label.Label += 2;
296         }
297         /* The labels of the following opcodes are moved only after
298          * the END opcode. */
299         if (vsctx->end_instruction &&
300             (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
301              inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
302              inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP ||
303              inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) {
304             inst->Label.Label += 2;
305         }
306     }
307 
308     ctx->emit_instruction(ctx, inst);
309 }
310 
r300_draw_init_vertex_shader(struct r300_context * r300,struct r300_vertex_shader * vs)311 void r300_draw_init_vertex_shader(struct r300_context *r300,
312                                   struct r300_vertex_shader *vs)
313 {
314     struct draw_context *draw = r300->draw;
315     struct tgsi_shader_info info;
316     struct vs_transform_context transform;
317     const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */;
318     struct pipe_shader_state new_vs = {
319         .type = PIPE_SHADER_IR_TGSI,
320         .tokens = tgsi_alloc_tokens(newLen)
321     };
322     unsigned i;
323 
324     tgsi_scan_shader(vs->state.tokens, &info);
325 
326     if (new_vs.tokens == NULL)
327         return;
328 
329     memset(&transform, 0, sizeof(transform));
330     for (i = 0; i < ARRAY_SIZE(transform.out_remap); i++) {
331         transform.out_remap[i] = i;
332     }
333     transform.last_generic = -1;
334     transform.base.transform_instruction = transform_inst;
335     transform.base.transform_declaration = transform_decl;
336 
337     for (i = 0; i < info.num_outputs; i++) {
338         unsigned index = info.output_semantic_index[i];
339 
340         switch (info.output_semantic_name[i]) {
341             case TGSI_SEMANTIC_COLOR:
342                 assert(index < 2);
343                 transform.color_used[index] = TRUE;
344                 break;
345 
346             case TGSI_SEMANTIC_BCOLOR:
347                 assert(index < 2);
348                 transform.bcolor_used[index] = TRUE;
349                 break;
350         }
351     }
352 
353     tgsi_transform_shader(vs->state.tokens,
354                           (struct tgsi_token*)new_vs.tokens,
355                           newLen, &transform.base);
356 
357 #if 0
358     printf("----------------------------------------------\norig shader:\n");
359     tgsi_dump(vs->state.tokens, 0);
360     printf("----------------------------------------------\nnew shader:\n");
361     tgsi_dump(new_vs.tokens, 0);
362     printf("----------------------------------------------\n");
363 #endif
364 
365     /* Free old tokens. */
366     FREE((void*)vs->state.tokens);
367 
368     vs->draw_vs = draw_create_vertex_shader(draw, &new_vs);
369 
370     /* Instead of duplicating and freeing the tokens, copy the pointer directly. */
371     vs->state.tokens = new_vs.tokens;
372 
373     /* Init the VS output table for the rasterizer. */
374     r300_init_vs_outputs(r300, vs);
375 
376     /* Make the last generic be WPOS. */
377     vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1];
378     vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED;
379 }
380