1 /*
2 ===========================================================================
3 Copyright (C) 1999-2005 Id Software, Inc.
4
5 This file is part of Quake III Arena source code.
6
7 Quake III Arena source code is free software; you can redistribute it
8 and/or modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the License,
10 or (at your option) any later version.
11
12 Quake III Arena source code is distributed in the hope that it will be
13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Quake III Arena source code; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 ===========================================================================
21 */
22
23 /* This file is only compiled for PowerPC builds with Altivec support.
24 Altivec intrinsics need to be in a separate file, so GCC's -maltivec
25 command line can enable them, but give us the option to _not_ use that
26 on other files, where the compiler might then generate Altivec
27 instructions for normal floating point, crashing on G3 (etc) processors. */
28
29 #include "tr_local.h"
30
31 #if idppc_altivec
32
33 #if !defined(__APPLE__)
34 #include <altivec.h>
35 #endif
36
ProjectDlightTexture_altivec(void)37 void ProjectDlightTexture_altivec( void ) {
38 int i, l;
39 vec_t origin0, origin1, origin2;
40 float texCoords0, texCoords1;
41 vector float floatColorVec0, floatColorVec1;
42 vector float modulateVec, colorVec, zero;
43 vector short colorShort;
44 vector signed int colorInt;
45 vector unsigned char floatColorVecPerm, modulatePerm, colorChar;
46 vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
47 0x00, 0x00, 0x00, 0xff,
48 0x00, 0x00, 0x00, 0xff,
49 0x00, 0x00, 0x00, 0xff);
50 float *texCoords;
51 byte *colors;
52 byte clipBits[SHADER_MAX_VERTEXES];
53 float texCoordsArray[SHADER_MAX_VERTEXES][2];
54 byte colorArray[SHADER_MAX_VERTEXES][4];
55 glIndex_t hitIndexes[SHADER_MAX_INDEXES];
56 int numIndexes;
57 float scale;
58 float radius;
59 vec3_t floatColor;
60 float modulate = 0.0f;
61
62 if ( !backEnd.refdef.num_dlights ) {
63 return;
64 }
65
66 if ( backEnd.refdef.rdflags & RDF_SNOOPERVIEW ) { // no dlights for snooper
67 return;
68 }
69
70 // There has to be a better way to do this so that floatColor
71 // and/or modulate are already 16-byte aligned.
72 floatColorVecPerm = vec_lvsl(0,(float *)floatColor);
73 modulatePerm = vec_lvsl(0,(float *)&modulate);
74 modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0);
75 zero = (vector float)vec_splat_s8(0);
76
77 for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) {
78 dlight_t *dl;
79
80 if ( !( tess.dlightBits & ( 1 << l ) ) ) {
81 continue; // this surface definately doesn't have any of this light
82 }
83 texCoords = texCoordsArray[0];
84 colors = colorArray[0];
85
86 dl = &backEnd.refdef.dlights[l];
87 origin0 = dl->transformed[0];
88 origin1 = dl->transformed[1];
89 origin2 = dl->transformed[2];
90 radius = dl->radius;
91 scale = 1.0f / radius;
92
93 if(r_greyscale->integer)
94 {
95 float luminance;
96
97 luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
98 floatColor[0] = floatColor[1] = floatColor[2] = luminance;
99 }
100 else if(r_greyscale->value)
101 {
102 float luminance;
103
104 luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
105 floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value);
106 floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value);
107 floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value);
108 }
109 else
110 {
111 floatColor[0] = dl->color[0] * 255.0f;
112 floatColor[1] = dl->color[1] * 255.0f;
113 floatColor[2] = dl->color[2] * 255.0f;
114 }
115 floatColorVec0 = vec_ld(0, floatColor);
116 floatColorVec1 = vec_ld(11, floatColor);
117 floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm);
118 for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) {
119 int clip = 0;
120 vec_t dist0, dist1, dist2;
121
122 dist0 = origin0 - tess.xyz[i][0];
123 dist1 = origin1 - tess.xyz[i][1];
124 dist2 = origin2 - tess.xyz[i][2];
125
126 backEnd.pc.c_dlightVertexes++;
127
128 texCoords0 = 0.5f + dist0 * scale;
129 texCoords1 = 0.5f + dist1 * scale;
130
131 if( !r_dlightBacks->integer &&
132 // dist . tess.normal[i]
133 ( dist0 * tess.normal[i][0] +
134 dist1 * tess.normal[i][1] +
135 dist2 * tess.normal[i][2] ) < 0.0f ) {
136 clip = 63;
137 } else {
138 if ( texCoords0 < 0.0f ) {
139 clip |= 1;
140 } else if ( texCoords0 > 1.0f ) {
141 clip |= 2;
142 }
143 if ( texCoords1 < 0.0f ) {
144 clip |= 4;
145 } else if ( texCoords1 > 1.0f ) {
146 clip |= 8;
147 }
148 texCoords[0] = texCoords0;
149 texCoords[1] = texCoords1;
150
151 // modulate the strength based on the height and color
152 if ( dist2 > radius ) {
153 clip |= 16;
154 modulate = 0.0f;
155 } else if ( dist2 < -radius ) {
156 clip |= 32;
157 modulate = 0.0f;
158 } else {
159 dist2 = Q_fabs(dist2);
160 if ( dist2 < radius * 0.5f ) {
161 modulate = 1.0f;
162 } else {
163 modulate = 2.0f * (radius - dist2) * scale;
164 }
165 }
166 }
167 clipBits[i] = clip;
168
169 modulateVec = vec_ld(0,(float *)&modulate);
170 modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm);
171 colorVec = vec_madd(floatColorVec0,modulateVec,zero);
172 colorInt = vec_cts(colorVec,0); // RGBx
173 colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx
174 colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx
175 colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255
176 vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color
177 }
178
179 // build a list of triangles that need light
180 numIndexes = 0;
181 for ( i = 0 ; i < tess.numIndexes ; i += 3 ) {
182 int a, b, c;
183
184 a = tess.indexes[i];
185 b = tess.indexes[i+1];
186 c = tess.indexes[i+2];
187 if ( clipBits[a] & clipBits[b] & clipBits[c] ) {
188 continue; // not lighted
189 }
190 hitIndexes[numIndexes] = a;
191 hitIndexes[numIndexes+1] = b;
192 hitIndexes[numIndexes+2] = c;
193 numIndexes += 3;
194 }
195
196 if ( !numIndexes ) {
197 continue;
198 }
199
200 qglEnableClientState( GL_TEXTURE_COORD_ARRAY );
201 qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] );
202
203 qglEnableClientState( GL_COLOR_ARRAY );
204 qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray );
205
206 //----(SA) creating dlight shader to allow for special blends or alternate dlight texture
207 {
208 shader_t *dls = dl->dlshader;
209 if ( dls ) {
210 for ( i = 0; i < dls->numUnfoggedPasses; i++ )
211 {
212 shaderStage_t *stage = dls->stages[i];
213 R_BindAnimatedImage( &dls->stages[i]->bundle[0] );
214 GL_State( stage->stateBits | GLS_DEPTHFUNC_EQUAL );
215 R_DrawElements( numIndexes, hitIndexes );
216 backEnd.pc.c_totalIndexes += numIndexes;
217 backEnd.pc.c_dlightIndexes += numIndexes;
218 }
219
220 } else
221 {
222 R_FogOff();
223
224 GL_Bind( tr.dlightImage );
225 // include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light
226 // where they aren't rendered
227 GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
228 R_DrawElements( numIndexes, hitIndexes );
229 backEnd.pc.c_totalIndexes += numIndexes;
230 backEnd.pc.c_dlightIndexes += numIndexes;
231
232 // Ridah, overdraw lights several times, rather than sending
233 // multiple lights through
234 for ( i = 0; i < dl->overdraw; i++ ) {
235 R_DrawElements( numIndexes, hitIndexes );
236 backEnd.pc.c_totalIndexes += numIndexes;
237 backEnd.pc.c_dlightIndexes += numIndexes;
238 }
239
240 R_FogOn();
241 }
242 }
243 }
244 }
245
RB_CalcDiffuseColor_altivec(unsigned char * colors)246 void RB_CalcDiffuseColor_altivec( unsigned char *colors )
247 {
248 int i;
249 float *v, *normal;
250 trRefEntity_t *ent;
251 vec3_t lightDir;
252 int numVertexes;
253 vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
254 0x00, 0x00, 0x00, 0xff,
255 0x00, 0x00, 0x00, 0xff,
256 0x00, 0x00, 0x00, 0xff);
257 vector float ambientLightVec;
258 vector float directedLightVec;
259 vector float lightDirVec;
260 vector float normalVec0, normalVec1;
261 vector float incomingVec0, incomingVec1, incomingVec2;
262 vector float zero, jVec;
263 vector signed int jVecInt;
264 vector signed short jVecShort;
265 vector unsigned char jVecChar, normalPerm;
266 ent = backEnd.currentEntity;
267 // A lot of this could be simplified if we made sure
268 // entities light info was 16-byte aligned.
269 jVecChar = vec_lvsl(0, ent->ambientLight);
270 ambientLightVec = vec_ld(0, (vector float *)ent->ambientLight);
271 jVec = vec_ld(11, (vector float *)ent->ambientLight);
272 ambientLightVec = vec_perm(ambientLightVec,jVec,jVecChar);
273
274 jVecChar = vec_lvsl(0, ent->directedLight);
275 directedLightVec = vec_ld(0,(vector float *)ent->directedLight);
276 jVec = vec_ld(11,(vector float *)ent->directedLight);
277 directedLightVec = vec_perm(directedLightVec,jVec,jVecChar);
278
279 jVecChar = vec_lvsl(0, ent->lightDir);
280 lightDirVec = vec_ld(0,(vector float *)ent->lightDir);
281 jVec = vec_ld(11,(vector float *)ent->lightDir);
282 lightDirVec = vec_perm(lightDirVec,jVec,jVecChar);
283
284 zero = (vector float)vec_splat_s8(0);
285 VectorCopy( ent->lightDir, lightDir );
286
287 v = tess.xyz[0];
288 normal = tess.normal[0];
289
290 normalPerm = vec_lvsl(0,normal);
291 numVertexes = tess.numVertexes;
292 for (i = 0 ; i < numVertexes ; i++, v += 4, normal += 4) {
293 normalVec0 = vec_ld(0,(vector float *)normal);
294 normalVec1 = vec_ld(11,(vector float *)normal);
295 normalVec0 = vec_perm(normalVec0,normalVec1,normalPerm);
296 incomingVec0 = vec_madd(normalVec0, lightDirVec, zero);
297 incomingVec1 = vec_sld(incomingVec0,incomingVec0,4);
298 incomingVec2 = vec_add(incomingVec0,incomingVec1);
299 incomingVec1 = vec_sld(incomingVec1,incomingVec1,4);
300 incomingVec2 = vec_add(incomingVec2,incomingVec1);
301 incomingVec0 = vec_splat(incomingVec2,0);
302 incomingVec0 = vec_max(incomingVec0,zero);
303 normalPerm = vec_lvsl(12,normal);
304 jVec = vec_madd(incomingVec0, directedLightVec, ambientLightVec);
305 jVecInt = vec_cts(jVec,0); // RGBx
306 jVecShort = vec_pack(jVecInt,jVecInt); // RGBxRGBx
307 jVecChar = vec_packsu(jVecShort,jVecShort); // RGBxRGBxRGBxRGBx
308 jVecChar = vec_sel(jVecChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255
309 vec_ste((vector unsigned int)jVecChar,0,(unsigned int *)&colors[i*4]); // store color
310 }
311 }
312
LerpMeshVertexes_altivec(md3Surface_t * surf,float backlerp)313 void LerpMeshVertexes_altivec(md3Surface_t *surf, float backlerp)
314 {
315 short *oldXyz, *newXyz, *oldNormals, *newNormals;
316 float *outXyz, *outNormal;
317 float oldXyzScale QALIGN(16);
318 float newXyzScale QALIGN(16);
319 float oldNormalScale QALIGN(16);
320 float newNormalScale QALIGN(16);
321 int vertNum;
322 unsigned lat, lng;
323 int numVerts;
324
325 outXyz = tess.xyz[tess.numVertexes];
326 outNormal = tess.normal[tess.numVertexes];
327
328 newXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
329 + (backEnd.currentEntity->e.frame * surf->numVerts * 4);
330 newNormals = newXyz + 3;
331
332 newXyzScale = MD3_XYZ_SCALE * (1.0 - backlerp);
333 newNormalScale = 1.0 - backlerp;
334
335 numVerts = surf->numVerts;
336
337 if ( backlerp == 0 ) {
338 vector signed short newNormalsVec0;
339 vector signed short newNormalsVec1;
340 vector signed int newNormalsIntVec;
341 vector float newNormalsFloatVec;
342 vector float newXyzScaleVec;
343 vector unsigned char newNormalsLoadPermute;
344 vector unsigned char newNormalsStorePermute;
345 vector float zero;
346
347 newNormalsStorePermute = vec_lvsl(0,(float *)&newXyzScaleVec);
348 newXyzScaleVec = *(vector float *)&newXyzScale;
349 newXyzScaleVec = vec_perm(newXyzScaleVec,newXyzScaleVec,newNormalsStorePermute);
350 newXyzScaleVec = vec_splat(newXyzScaleVec,0);
351 newNormalsLoadPermute = vec_lvsl(0,newXyz);
352 newNormalsStorePermute = vec_lvsr(0,outXyz);
353 zero = (vector float)vec_splat_s8(0);
354 //
355 // just copy the vertexes
356 //
357 for (vertNum=0 ; vertNum < numVerts ; vertNum++,
358 newXyz += 4, newNormals += 4,
359 outXyz += 4, outNormal += 4)
360 {
361 newNormalsLoadPermute = vec_lvsl(0,newXyz);
362 newNormalsStorePermute = vec_lvsr(0,outXyz);
363 newNormalsVec0 = vec_ld(0,newXyz);
364 newNormalsVec1 = vec_ld(16,newXyz);
365 newNormalsVec0 = vec_perm(newNormalsVec0,newNormalsVec1,newNormalsLoadPermute);
366 newNormalsIntVec = vec_unpackh(newNormalsVec0);
367 newNormalsFloatVec = vec_ctf(newNormalsIntVec,0);
368 newNormalsFloatVec = vec_madd(newNormalsFloatVec,newXyzScaleVec,zero);
369 newNormalsFloatVec = vec_perm(newNormalsFloatVec,newNormalsFloatVec,newNormalsStorePermute);
370 //outXyz[0] = newXyz[0] * newXyzScale;
371 //outXyz[1] = newXyz[1] * newXyzScale;
372 //outXyz[2] = newXyz[2] * newXyzScale;
373
374 lat = ( newNormals[0] >> 8 ) & 0xff;
375 lng = ( newNormals[0] & 0xff );
376 lat *= (FUNCTABLE_SIZE/256);
377 lng *= (FUNCTABLE_SIZE/256);
378
379 // decode X as cos( lat ) * sin( long )
380 // decode Y as sin( lat ) * sin( long )
381 // decode Z as cos( long )
382
383 outNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
384 outNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
385 outNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
386
387 vec_ste(newNormalsFloatVec,0,outXyz);
388 vec_ste(newNormalsFloatVec,4,outXyz);
389 vec_ste(newNormalsFloatVec,8,outXyz);
390 }
391 } else {
392 //
393 // interpolate and copy the vertex and normal
394 //
395 oldXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
396 + (backEnd.currentEntity->e.oldframe * surf->numVerts * 4);
397 oldNormals = oldXyz + 3;
398
399 oldXyzScale = MD3_XYZ_SCALE * backlerp;
400 oldNormalScale = backlerp;
401
402 for (vertNum=0 ; vertNum < numVerts ; vertNum++,
403 oldXyz += 4, newXyz += 4, oldNormals += 4, newNormals += 4,
404 outXyz += 4, outNormal += 4)
405 {
406 vec3_t uncompressedOldNormal, uncompressedNewNormal;
407
408 // interpolate the xyz
409 outXyz[0] = oldXyz[0] * oldXyzScale + newXyz[0] * newXyzScale;
410 outXyz[1] = oldXyz[1] * oldXyzScale + newXyz[1] * newXyzScale;
411 outXyz[2] = oldXyz[2] * oldXyzScale + newXyz[2] * newXyzScale;
412
413 // FIXME: interpolate lat/long instead?
414 lat = ( newNormals[0] >> 8 ) & 0xff;
415 lng = ( newNormals[0] & 0xff );
416 lat *= 4;
417 lng *= 4;
418 uncompressedNewNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
419 uncompressedNewNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
420 uncompressedNewNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
421
422 lat = ( oldNormals[0] >> 8 ) & 0xff;
423 lng = ( oldNormals[0] & 0xff );
424 lat *= 4;
425 lng *= 4;
426
427 uncompressedOldNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
428 uncompressedOldNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
429 uncompressedOldNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
430
431 outNormal[0] = uncompressedOldNormal[0] * oldNormalScale + uncompressedNewNormal[0] * newNormalScale;
432 outNormal[1] = uncompressedOldNormal[1] * oldNormalScale + uncompressedNewNormal[1] * newNormalScale;
433 outNormal[2] = uncompressedOldNormal[2] * oldNormalScale + uncompressedNewNormal[2] * newNormalScale;
434
435 // VectorNormalize (outNormal);
436 }
437 VectorArrayNormalize((vec4_t *)tess.normal[tess.numVertexes], numVerts);
438 }
439 }
440
441 #endif
442