1 /*
2  * Copyright (C) 2004  David Airlie   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21 
22 #include "main/glheader.h"
23 #include "main/macros.h"
24 #include "main/atifragshader.h"
25 #include "main/samplerobj.h"
26 #include "swrast/s_atifragshader.h"
27 #include "swrast/s_context.h"
28 
29 #define ATI_FS_INPUT_PRIMARY 0
30 #define ATI_FS_INPUT_SECONDARY 1
31 
32 /**
33  * State for executing ATI fragment shader.
34  */
35 struct atifs_machine
36 {
37    GLfloat Registers[6][4];         /** six temporary registers */
38    GLfloat PrevPassRegisters[6][4];
39    GLfloat Inputs[2][4];   /** Primary, secondary input colors */
40 };
41 
42 
43 
44 /**
45  * Fetch a texel.
46  */
47 static void
fetch_texel(struct gl_context * ctx,const GLfloat texcoord[4],GLfloat lambda,GLuint unit,GLfloat color[4])48 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
49 	    GLuint unit, GLfloat color[4])
50 {
51    SWcontext *swrast = SWRAST_CONTEXT(ctx);
52 
53    /* XXX use a float-valued TextureSample routine here!!! */
54    swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
55                                ctx->Texture.Unit[unit]._Current,
56 			       1, (const GLfloat(*)[4]) texcoord,
57                                &lambda, (GLfloat (*)[4]) color);
58 }
59 
60 static void
apply_swizzle(GLfloat values[4],GLuint swizzle)61 apply_swizzle(GLfloat values[4], GLuint swizzle)
62 {
63    GLfloat s, t, r, q;
64 
65    s = values[0];
66    t = values[1];
67    r = values[2];
68    q = values[3];
69 
70    switch (swizzle) {
71    case GL_SWIZZLE_STR_ATI:
72       values[0] = s;
73       values[1] = t;
74       values[2] = r;
75       break;
76    case GL_SWIZZLE_STQ_ATI:
77       values[0] = s;
78       values[1] = t;
79       values[2] = q;
80       break;
81    case GL_SWIZZLE_STR_DR_ATI:
82       values[0] = s / r;
83       values[1] = t / r;
84       values[2] = 1 / r;
85       break;
86    case GL_SWIZZLE_STQ_DQ_ATI:
87 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
88       if (q == 0.0F)
89          q = 0.000000001F;
90       values[0] = s / q;
91       values[1] = t / q;
92       values[2] = 1.0F / q;
93       break;
94    }
95    values[3] = 0.0;
96 }
97 
98 static void
apply_src_rep(GLint optype,GLuint rep,GLfloat * val)99 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
100 {
101    GLint i;
102    GLint start, end;
103    if (!rep)
104       return;
105 
106    start = optype ? 3 : 0;
107    end = 4;
108 
109    for (i = start; i < end; i++) {
110       switch (rep) {
111       case GL_RED:
112 	 val[i] = val[0];
113 	 break;
114       case GL_GREEN:
115 	 val[i] = val[1];
116 	 break;
117       case GL_BLUE:
118 	 val[i] = val[2];
119 	 break;
120       case GL_ALPHA:
121 	 val[i] = val[3];
122 	 break;
123       }
124    }
125 }
126 
127 static void
apply_src_mod(GLint optype,GLuint mod,GLfloat * val)128 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
129 {
130    GLint i;
131    GLint start, end;
132 
133    if (!mod)
134       return;
135 
136    start = optype ? 3 : 0;
137    end = 4;
138 
139    for (i = start; i < end; i++) {
140       if (mod & GL_COMP_BIT_ATI)
141 	 val[i] = 1 - val[i];
142 
143       if (mod & GL_BIAS_BIT_ATI)
144 	 val[i] = val[i] - 0.5F;
145 
146       if (mod & GL_2X_BIT_ATI)
147 	 val[i] = 2 * val[i];
148 
149       if (mod & GL_NEGATE_BIT_ATI)
150 	 val[i] = -val[i];
151    }
152 }
153 
154 static void
apply_dst_mod(GLuint optype,GLuint mod,GLfloat * val)155 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
156 {
157    GLint i;
158    GLint has_sat = mod & GL_SATURATE_BIT_ATI;
159    GLint start, end;
160 
161    mod &= ~GL_SATURATE_BIT_ATI;
162 
163    start = optype ? 3 : 0;
164    end = optype ? 4 : 3;
165 
166    for (i = start; i < end; i++) {
167       switch (mod) {
168       case GL_2X_BIT_ATI:
169 	 val[i] = 2 * val[i];
170 	 break;
171       case GL_4X_BIT_ATI:
172 	 val[i] = 4 * val[i];
173 	 break;
174       case GL_8X_BIT_ATI:
175 	 val[i] = 8 * val[i];
176 	 break;
177       case GL_HALF_BIT_ATI:
178 	 val[i] = val[i] * 0.5F;
179 	 break;
180       case GL_QUARTER_BIT_ATI:
181 	 val[i] = val[i] * 0.25F;
182 	 break;
183       case GL_EIGHTH_BIT_ATI:
184 	 val[i] = val[i] * 0.125F;
185 	 break;
186       }
187 
188       if (has_sat) {
189 	 if (val[i] < 0.0F)
190 	    val[i] = 0.0F;
191 	 else if (val[i] > 1.0F)
192 	    val[i] = 1.0F;
193       }
194       else {
195 	 if (val[i] < -8.0F)
196 	    val[i] = -8.0F;
197 	 else if (val[i] > 8.0F)
198 	    val[i] = 8.0F;
199       }
200    }
201 }
202 
203 
204 static void
write_dst_addr(GLuint optype,GLuint mod,GLuint mask,GLfloat * src,GLfloat * dst)205 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
206 	       GLfloat * dst)
207 {
208    GLint i;
209    apply_dst_mod(optype, mod, src);
210 
211    if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
212       if (mask) {
213 	 if (mask & GL_RED_BIT_ATI)
214 	    dst[0] = src[0];
215 
216 	 if (mask & GL_GREEN_BIT_ATI)
217 	    dst[1] = src[1];
218 
219 	 if (mask & GL_BLUE_BIT_ATI)
220 	    dst[2] = src[2];
221       }
222       else {
223 	 for (i = 0; i < 3; i++)
224 	    dst[i] = src[i];
225       }
226    }
227    else
228       dst[3] = src[3];
229 }
230 
231 static void
finish_pass(struct atifs_machine * machine)232 finish_pass(struct atifs_machine *machine)
233 {
234    GLint i;
235 
236    for (i = 0; i < 6; i++) {
237       COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
238    }
239 }
240 
241 
242 static void
handle_pass_op(struct atifs_machine * machine,struct atifs_setupinst * texinst,const SWspan * span,GLuint column,GLuint idx)243 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
244 	       const SWspan *span, GLuint column, GLuint idx)
245 {
246    GLuint swizzle = texinst->swizzle;
247    GLuint pass_tex = texinst->src;
248 
249    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
250       pass_tex -= GL_TEXTURE0_ARB;
251       COPY_4V(machine->Registers[idx],
252 	      span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]);
253    }
254    else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
255       pass_tex -= GL_REG_0_ATI;
256       COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
257    }
258    apply_swizzle(machine->Registers[idx], swizzle);
259 
260 }
261 
262 static void
handle_sample_op(struct gl_context * ctx,struct atifs_machine * machine,struct atifs_setupinst * texinst,const SWspan * span,GLuint column,GLuint idx)263 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
264 		 struct atifs_setupinst *texinst, const SWspan *span,
265 		 GLuint column, GLuint idx)
266 {
267 /* sample from unit idx using texinst->src as coords */
268    GLuint swizzle = texinst->swizzle;
269    GLuint coord_source = texinst->src;
270    GLfloat tex_coords[4] = { 0 };
271 
272    if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
273       coord_source -= GL_TEXTURE0_ARB;
274       COPY_4V(tex_coords,
275               span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]);
276    }
277    else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
278       coord_source -= GL_REG_0_ATI;
279       COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
280    }
281    apply_swizzle(tex_coords, swizzle);
282    fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
283 }
284 
285 #define SETUP_SRC_REG(optype, i, x)		\
286 do {						\
287    COPY_4V(src[optype][i], x); 			\
288 } while (0)
289 
290 
291 
292 /**
293  * Execute the given fragment shader.
294  * NOTE: we do everything in single-precision floating point
295  * \param ctx - rendering context
296  * \param shader - the shader to execute
297  * \param machine - virtual machine state
298  * \param span - the SWspan we're operating on
299  * \param column - which pixel [i] we're operating on in the span
300  */
301 static void
execute_shader(struct gl_context * ctx,const struct ati_fragment_shader * shader,struct atifs_machine * machine,const SWspan * span,GLuint column)302 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
303 	       struct atifs_machine *machine, const SWspan *span,
304                GLuint column)
305 {
306    GLuint pc;
307    struct atifs_instruction *inst;
308    struct atifs_setupinst *texinst;
309    GLint optype;
310    GLuint i;
311    GLint j, pass;
312    GLint dstreg;
313    GLfloat src[2][3][4];
314    GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
315    GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
316    GLfloat dst[2][4], *dstp;
317 
318    for (pass = 0; pass < shader->NumPasses; pass++) {
319       if (pass > 0)
320 	 finish_pass(machine);
321       for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
322 	 texinst = &shader->SetupInst[pass][j];
323 	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
324 	    handle_pass_op(machine, texinst, span, column, j);
325 	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
326 	    handle_sample_op(ctx, machine, texinst, span, column, j);
327       }
328 
329       for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
330 	 inst = &shader->Instructions[pass][pc];
331 
332 	 /* setup the source registers for color and alpha ops */
333 	 for (optype = 0; optype < 2; optype++) {
334  	    for (i = 0; i < inst->ArgCount[optype]; i++) {
335 	       GLint index = inst->SrcReg[optype][i].Index;
336 
337 	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
338 		  SETUP_SRC_REG(optype, i,
339 				machine->Registers[index - GL_REG_0_ATI]);
340 	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
341 		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
342 		     SETUP_SRC_REG(optype, i,
343 				shader->Constants[index - GL_CON_0_ATI]);
344 		  } else {
345 		     SETUP_SRC_REG(optype, i,
346 				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
347 		  }
348 	       }
349 	       else if (index == GL_ONE)
350 		  SETUP_SRC_REG(optype, i, ones);
351 	       else if (index == GL_ZERO)
352 		  SETUP_SRC_REG(optype, i, zeros);
353 	       else if (index == GL_PRIMARY_COLOR_EXT)
354 		  SETUP_SRC_REG(optype, i,
355 				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
356 	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
357 		  SETUP_SRC_REG(optype, i,
358 				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
359 
360 	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
361 			     src[optype][i]);
362 	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
363 			     src[optype][i]);
364 	    }
365 	 }
366 
367 	 /* Execute the operations - color then alpha */
368 	 for (optype = 0; optype < 2; optype++) {
369 	    if (inst->Opcode[optype]) {
370 	       switch (inst->Opcode[optype]) {
371 	       case GL_ADD_ATI:
372 		  if (!optype)
373 		     for (i = 0; i < 3; i++) {
374 			dst[optype][i] =
375 			   src[optype][0][i] + src[optype][1][i];
376 		     }
377 		  else
378 		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
379 		  break;
380 	       case GL_SUB_ATI:
381 		  if (!optype)
382 		     for (i = 0; i < 3; i++) {
383 			dst[optype][i] =
384 			   src[optype][0][i] - src[optype][1][i];
385 		     }
386 		  else
387 		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
388 		  break;
389 	       case GL_MUL_ATI:
390 		  if (!optype)
391 		     for (i = 0; i < 3; i++) {
392 			dst[optype][i] =
393 			   src[optype][0][i] * src[optype][1][i];
394 		     }
395 		  else
396 		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
397 		  break;
398 	       case GL_MAD_ATI:
399 		  if (!optype)
400 		     for (i = 0; i < 3; i++) {
401 			dst[optype][i] =
402 			   src[optype][0][i] * src[optype][1][i] +
403 			   src[optype][2][i];
404 		     }
405 		  else
406 		     dst[optype][3] =
407 			src[optype][0][3] * src[optype][1][3] +
408 			src[optype][2][3];
409 		  break;
410 	       case GL_LERP_ATI:
411 		  if (!optype)
412 		     for (i = 0; i < 3; i++) {
413 			dst[optype][i] =
414 			   src[optype][0][i] * src[optype][1][i] + (1 -
415 								    src
416 								    [optype]
417 								    [0][i]) *
418 			   src[optype][2][i];
419 		     }
420 		  else
421 		     dst[optype][3] =
422 			src[optype][0][3] * src[optype][1][3] + (1 -
423 								 src[optype]
424 								 [0][3]) *
425 			src[optype][2][3];
426 		  break;
427 
428 	       case GL_MOV_ATI:
429 		  if (!optype)
430 		     for (i = 0; i < 3; i++) {
431 			dst[optype][i] = src[optype][0][i];
432 		     }
433 		  else
434 		     dst[optype][3] = src[optype][0][3];
435 		  break;
436 	       case GL_CND_ATI:
437 		  if (!optype) {
438 		     for (i = 0; i < 3; i++) {
439 			dst[optype][i] =
440 			   (src[optype][2][i] >
441 			    0.5F) ? src[optype][0][i] : src[optype][1][i];
442 		     }
443 		  }
444 		  else {
445 		     dst[optype][3] =
446 			(src[optype][2][3] >
447 			 0.5F) ? src[optype][0][3] : src[optype][1][3];
448 		  }
449 		  break;
450 
451 	       case GL_CND0_ATI:
452 		  if (!optype)
453 		     for (i = 0; i < 3; i++) {
454 			dst[optype][i] =
455 			   (src[optype][2][i] >=
456 			    0) ? src[optype][0][i] : src[optype][1][i];
457 		     }
458 		  else {
459 		     dst[optype][3] =
460 			(src[optype][2][3] >=
461 			 0) ? src[optype][0][3] : src[optype][1][3];
462 		  }
463 		  break;
464 	       case GL_DOT2_ADD_ATI:
465 		  {
466 		     GLfloat result;
467 
468 		     /* DOT 2 always uses the source from the color op */
469 		     /* could save recalculation of dot products for alpha inst */
470 		     result = src[0][0][0] * src[0][1][0] +
471 			src[0][0][1] * src[0][1][1] + src[0][2][2];
472 		     if (!optype) {
473 			for (i = 0; i < 3; i++) {
474 			   dst[optype][i] = result;
475 			}
476 		     }
477 		     else
478 			dst[optype][3] = result;
479 		  }
480 		  break;
481 	       case GL_DOT3_ATI:
482 		  {
483 		     GLfloat result;
484 
485 		     /* DOT 3 always uses the source from the color op */
486 		     result = src[0][0][0] * src[0][1][0] +
487 			src[0][0][1] * src[0][1][1] +
488 			src[0][0][2] * src[0][1][2];
489 
490 		     if (!optype) {
491 			for (i = 0; i < 3; i++) {
492 			   dst[optype][i] = result;
493 			}
494 		     }
495 		     else
496 			dst[optype][3] = result;
497 		  }
498 		  break;
499 	       case GL_DOT4_ATI:
500 		  {
501 		     GLfloat result;
502 
503 		     /* DOT 4 always uses the source from the color op */
504 		     result = src[0][0][0] * src[0][1][0] +
505 			src[0][0][1] * src[0][1][1] +
506 			src[0][0][2] * src[0][1][2] +
507 			src[0][0][3] * src[0][1][3];
508 		     if (!optype) {
509 			for (i = 0; i < 3; i++) {
510 			   dst[optype][i] = result;
511 			}
512 		     }
513 		     else
514 			dst[optype][3] = result;
515 		  }
516 		  break;
517 
518 	       }
519 	    }
520 	 }
521 
522 	 /* write out the destination registers */
523 	 for (optype = 0; optype < 2; optype++) {
524 	    if (inst->Opcode[optype]) {
525 	       dstreg = inst->DstReg[optype].Index;
526 	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
527 
528 	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
529 		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
530 	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
531 			      inst->DstReg[optype].dstMask, dst[optype],
532 			      dstp);
533 	       else
534 		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
535 	    }
536 	 }
537       }
538    }
539 }
540 
541 
542 /**
543  * Init fragment shader virtual machine state.
544  */
545 static void
init_machine(struct gl_context * ctx,struct atifs_machine * machine,const struct ati_fragment_shader * shader,const SWspan * span,GLuint col)546 init_machine(struct gl_context * ctx, struct atifs_machine *machine,
547 	     const struct ati_fragment_shader *shader,
548 	     const SWspan *span, GLuint col)
549 {
550    GLfloat (*inputs)[4] = machine->Inputs;
551    GLint i, j;
552 
553    for (i = 0; i < 6; i++) {
554       for (j = 0; j < 4; j++)
555 	 machine->Registers[i][j] = 0.0;
556    }
557 
558    COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]);
559    COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]);
560 }
561 
562 
563 
564 /**
565  * Execute the current ATI shader program, operating on the given span.
566  */
567 void
_swrast_exec_fragment_shader(struct gl_context * ctx,SWspan * span)568 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
569 {
570    const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
571    struct atifs_machine machine;
572    GLuint i;
573 
574    /* incoming colors should be floats */
575    assert(span->array->ChanType == GL_FLOAT);
576 
577    for (i = 0; i < span->end; i++) {
578       if (span->array->mask[i]) {
579 	 init_machine(ctx, &machine, shader, span, i);
580 
581 	 execute_shader(ctx, shader, &machine, span, i);
582 
583          /* store result color */
584 	 {
585 	    const GLfloat *colOut = machine.Registers[0];
586             /*fprintf(stderr,"outputs %f %f %f %f\n",
587               colOut[0], colOut[1], colOut[2], colOut[3]); */
588             COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut);
589 	 }
590       }
591    }
592 }
593