1 /*
2 ===========================================================================
3 Copyright (C) 1999-2005 Id Software, Inc.
4 
5 This file is part of Quake III Arena source code.
6 
7 Quake III Arena source code is free software; you can redistribute it
8 and/or modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the License,
10 or (at your option) any later version.
11 
12 Quake III Arena source code is distributed in the hope that it will be
13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Quake III Arena source code; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20 ===========================================================================
21 */
22 
23 /* This file is only compiled for PowerPC builds with Altivec support.
24    Altivec intrinsics need to be in a separate file, so GCC's -maltivec
25    command line can enable them, but give us the option to _not_ use that
26    on other files, where the compiler might then generate Altivec
27    instructions for normal floating point, crashing on G3 (etc) processors. */
28 
29 #include "tr_local.h"
30 
31 #if idppc_altivec
32 
33 #if !defined(__APPLE__)
34 #include <altivec.h>
35 #endif
36 
ProjectDlightTexture_altivec(void)37 void ProjectDlightTexture_altivec( void ) {
38 	int		i, l;
39 	vec_t	origin0, origin1, origin2;
40 	float   texCoords0, texCoords1;
41 	vector float floatColorVec0, floatColorVec1;
42 	vector float modulateVec, colorVec, zero;
43 	vector short colorShort;
44 	vector signed int colorInt;
45 	vector unsigned char floatColorVecPerm, modulatePerm, colorChar;
46 	vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
47                                                0x00, 0x00, 0x00, 0xff,
48                                                0x00, 0x00, 0x00, 0xff,
49                                                0x00, 0x00, 0x00, 0xff);
50 	float	*texCoords;
51 	byte	*colors;
52 	byte	clipBits[SHADER_MAX_VERTEXES];
53 	float	texCoordsArray[SHADER_MAX_VERTEXES][2];
54 	byte	colorArray[SHADER_MAX_VERTEXES][4];
55 	glIndex_t	hitIndexes[SHADER_MAX_INDEXES];
56 	int		numIndexes;
57 	float	scale;
58 	float	radius;
59 	vec3_t	floatColor;
60 	float	modulate = 0.0f;
61 
62 	if ( !backEnd.refdef.num_dlights ) {
63 		return;
64 	}
65 
66 	if ( backEnd.refdef.rdflags & RDF_SNOOPERVIEW ) {  // no dlights for snooper
67 		return;
68 	}
69 
70 	// There has to be a better way to do this so that floatColor
71 	// and/or modulate are already 16-byte aligned.
72 	floatColorVecPerm = vec_lvsl(0,(float *)floatColor);
73 	modulatePerm = vec_lvsl(0,(float *)&modulate);
74 	modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0);
75 	zero = (vector float)vec_splat_s8(0);
76 
77 	for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) {
78 		dlight_t	*dl;
79 
80 		if ( !( tess.dlightBits & ( 1 << l ) ) ) {
81 			continue;	// this surface definately doesn't have any of this light
82 		}
83 		texCoords = texCoordsArray[0];
84 		colors = colorArray[0];
85 
86 		dl = &backEnd.refdef.dlights[l];
87 		origin0 = dl->transformed[0];
88 		origin1 = dl->transformed[1];
89 		origin2 = dl->transformed[2];
90 		radius = dl->radius;
91 		scale = 1.0f / radius;
92 
93 		if(r_greyscale->integer)
94 		{
95 			float luminance;
96 
97 			luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
98 			floatColor[0] = floatColor[1] = floatColor[2] = luminance;
99 		}
100 		else if(r_greyscale->value)
101 		{
102 			float luminance;
103 
104 			luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
105 			floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value);
106 			floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value);
107 			floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value);
108 		}
109 		else
110 		{
111 			floatColor[0] = dl->color[0] * 255.0f;
112 			floatColor[1] = dl->color[1] * 255.0f;
113 			floatColor[2] = dl->color[2] * 255.0f;
114 		}
115 		floatColorVec0 = vec_ld(0, floatColor);
116 		floatColorVec1 = vec_ld(11, floatColor);
117 		floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm);
118 		for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) {
119 			int		clip = 0;
120 			vec_t dist0, dist1, dist2;
121 
122 			dist0 = origin0 - tess.xyz[i][0];
123 			dist1 = origin1 - tess.xyz[i][1];
124 			dist2 = origin2 - tess.xyz[i][2];
125 
126 			backEnd.pc.c_dlightVertexes++;
127 
128 			texCoords0 = 0.5f + dist0 * scale;
129 			texCoords1 = 0.5f + dist1 * scale;
130 
131 			if( !r_dlightBacks->integer &&
132 					// dist . tess.normal[i]
133 					( dist0 * tess.normal[i][0] +
134 					dist1 * tess.normal[i][1] +
135 					dist2 * tess.normal[i][2] ) < 0.0f ) {
136 				clip = 63;
137 			} else {
138 				if ( texCoords0 < 0.0f ) {
139 					clip |= 1;
140 				} else if ( texCoords0 > 1.0f ) {
141 					clip |= 2;
142 				}
143 				if ( texCoords1 < 0.0f ) {
144 					clip |= 4;
145 				} else if ( texCoords1 > 1.0f ) {
146 					clip |= 8;
147 				}
148 				texCoords[0] = texCoords0;
149 				texCoords[1] = texCoords1;
150 
151 				// modulate the strength based on the height and color
152 				if ( dist2 > radius ) {
153 					clip |= 16;
154 					modulate = 0.0f;
155 				} else if ( dist2 < -radius ) {
156 					clip |= 32;
157 					modulate = 0.0f;
158 				} else {
159 					dist2 = Q_fabs(dist2);
160 					if ( dist2 < radius * 0.5f ) {
161 						modulate = 1.0f;
162 					} else {
163 						modulate = 2.0f * (radius - dist2) * scale;
164 					}
165 				}
166 			}
167 			clipBits[i] = clip;
168 
169 			modulateVec = vec_ld(0,(float *)&modulate);
170 			modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm);
171 			colorVec = vec_madd(floatColorVec0,modulateVec,zero);
172 			colorInt = vec_cts(colorVec,0);	// RGBx
173 			colorShort = vec_pack(colorInt,colorInt);		// RGBxRGBx
174 			colorChar = vec_packsu(colorShort,colorShort);	// RGBxRGBxRGBxRGBx
175 			colorChar = vec_sel(colorChar,vSel,vSel);		// RGBARGBARGBARGBA replace alpha with 255
176 			vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors);	// store color
177 		}
178 
179 		// build a list of triangles that need light
180 		numIndexes = 0;
181 		for ( i = 0 ; i < tess.numIndexes ; i += 3 ) {
182 			int		a, b, c;
183 
184 			a = tess.indexes[i];
185 			b = tess.indexes[i+1];
186 			c = tess.indexes[i+2];
187 			if ( clipBits[a] & clipBits[b] & clipBits[c] ) {
188 				continue;	// not lighted
189 			}
190 			hitIndexes[numIndexes] = a;
191 			hitIndexes[numIndexes+1] = b;
192 			hitIndexes[numIndexes+2] = c;
193 			numIndexes += 3;
194 		}
195 
196 		if ( !numIndexes ) {
197 			continue;
198 		}
199 
200 		qglEnableClientState( GL_TEXTURE_COORD_ARRAY );
201 		qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] );
202 
203 		qglEnableClientState( GL_COLOR_ARRAY );
204 		qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray );
205 
206 		//----(SA) creating dlight shader to allow for special blends or alternate dlight texture
207 		{
208 			shader_t *dls = dl->dlshader;
209 			if ( dls ) {
210 				for ( i = 0; i < dls->numUnfoggedPasses; i++ )
211 				{
212 					shaderStage_t *stage = dls->stages[i];
213 					R_BindAnimatedImage( &dls->stages[i]->bundle[0] );
214 					GL_State( stage->stateBits | GLS_DEPTHFUNC_EQUAL );
215 					R_DrawElements( numIndexes, hitIndexes );
216 					backEnd.pc.c_totalIndexes += numIndexes;
217 					backEnd.pc.c_dlightIndexes += numIndexes;
218 				}
219 
220 			} else
221 			{
222 				R_FogOff();
223 
224 				GL_Bind( tr.dlightImage );
225 				// include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light
226 				// where they aren't rendered
227 				GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
228 				R_DrawElements( numIndexes, hitIndexes );
229 				backEnd.pc.c_totalIndexes += numIndexes;
230 				backEnd.pc.c_dlightIndexes += numIndexes;
231 
232 				// Ridah, overdraw lights several times, rather than sending
233 				//	multiple lights through
234 				for ( i = 0; i < dl->overdraw; i++ ) {
235 					R_DrawElements( numIndexes, hitIndexes );
236 					backEnd.pc.c_totalIndexes += numIndexes;
237 					backEnd.pc.c_dlightIndexes += numIndexes;
238 				}
239 
240 				R_FogOn();
241 			}
242 		}
243 	}
244 }
245 
RB_CalcDiffuseColor_altivec(unsigned char * colors)246 void RB_CalcDiffuseColor_altivec( unsigned char *colors )
247 {
248 	int				i;
249 	float			*v, *normal;
250 	trRefEntity_t	*ent;
251 	vec3_t			lightDir;
252 	int				numVertexes;
253 	vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
254                                                0x00, 0x00, 0x00, 0xff,
255                                                0x00, 0x00, 0x00, 0xff,
256                                                0x00, 0x00, 0x00, 0xff);
257 	vector float ambientLightVec;
258 	vector float directedLightVec;
259 	vector float lightDirVec;
260 	vector float normalVec0, normalVec1;
261 	vector float incomingVec0, incomingVec1, incomingVec2;
262 	vector float zero, jVec;
263 	vector signed int jVecInt;
264 	vector signed short jVecShort;
265 	vector unsigned char jVecChar, normalPerm;
266 	ent = backEnd.currentEntity;
267 	// A lot of this could be simplified if we made sure
268 	// entities light info was 16-byte aligned.
269 	jVecChar = vec_lvsl(0, ent->ambientLight);
270 	ambientLightVec = vec_ld(0, (vector float *)ent->ambientLight);
271 	jVec = vec_ld(11, (vector float *)ent->ambientLight);
272 	ambientLightVec = vec_perm(ambientLightVec,jVec,jVecChar);
273 
274 	jVecChar = vec_lvsl(0, ent->directedLight);
275 	directedLightVec = vec_ld(0,(vector float *)ent->directedLight);
276 	jVec = vec_ld(11,(vector float *)ent->directedLight);
277 	directedLightVec = vec_perm(directedLightVec,jVec,jVecChar);
278 
279 	jVecChar = vec_lvsl(0, ent->lightDir);
280 	lightDirVec = vec_ld(0,(vector float *)ent->lightDir);
281 	jVec = vec_ld(11,(vector float *)ent->lightDir);
282 	lightDirVec = vec_perm(lightDirVec,jVec,jVecChar);
283 
284 	zero = (vector float)vec_splat_s8(0);
285 	VectorCopy( ent->lightDir, lightDir );
286 
287 	v = tess.xyz[0];
288 	normal = tess.normal[0];
289 
290 	normalPerm = vec_lvsl(0,normal);
291 	numVertexes = tess.numVertexes;
292 	for (i = 0 ; i < numVertexes ; i++, v += 4, normal += 4) {
293 		normalVec0 = vec_ld(0,(vector float *)normal);
294 		normalVec1 = vec_ld(11,(vector float *)normal);
295 		normalVec0 = vec_perm(normalVec0,normalVec1,normalPerm);
296 		incomingVec0 = vec_madd(normalVec0, lightDirVec, zero);
297 		incomingVec1 = vec_sld(incomingVec0,incomingVec0,4);
298 		incomingVec2 = vec_add(incomingVec0,incomingVec1);
299 		incomingVec1 = vec_sld(incomingVec1,incomingVec1,4);
300 		incomingVec2 = vec_add(incomingVec2,incomingVec1);
301 		incomingVec0 = vec_splat(incomingVec2,0);
302 		incomingVec0 = vec_max(incomingVec0,zero);
303 		normalPerm = vec_lvsl(12,normal);
304 		jVec = vec_madd(incomingVec0, directedLightVec, ambientLightVec);
305 		jVecInt = vec_cts(jVec,0);	// RGBx
306 		jVecShort = vec_pack(jVecInt,jVecInt);		// RGBxRGBx
307 		jVecChar = vec_packsu(jVecShort,jVecShort);	// RGBxRGBxRGBxRGBx
308 		jVecChar = vec_sel(jVecChar,vSel,vSel);		// RGBARGBARGBARGBA replace alpha with 255
309 		vec_ste((vector unsigned int)jVecChar,0,(unsigned int *)&colors[i*4]);	// store color
310 	}
311 }
312 
LerpMeshVertexes_altivec(md3Surface_t * surf,float backlerp)313 void LerpMeshVertexes_altivec(md3Surface_t *surf, float backlerp)
314 {
315 	short	*oldXyz, *newXyz, *oldNormals, *newNormals;
316 	float	*outXyz, *outNormal;
317 	float	oldXyzScale QALIGN(16);
318 	float   newXyzScale QALIGN(16);
319 	float	oldNormalScale QALIGN(16);
320 	float newNormalScale QALIGN(16);
321 	int		vertNum;
322 	unsigned lat, lng;
323 	int		numVerts;
324 
325 	outXyz = tess.xyz[tess.numVertexes];
326 	outNormal = tess.normal[tess.numVertexes];
327 
328 	newXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
329 		+ (backEnd.currentEntity->e.frame * surf->numVerts * 4);
330 	newNormals = newXyz + 3;
331 
332 	newXyzScale = MD3_XYZ_SCALE * (1.0 - backlerp);
333 	newNormalScale = 1.0 - backlerp;
334 
335 	numVerts = surf->numVerts;
336 
337 	if ( backlerp == 0 ) {
338 		vector signed short newNormalsVec0;
339 		vector signed short newNormalsVec1;
340 		vector signed int newNormalsIntVec;
341 		vector float newNormalsFloatVec;
342 		vector float newXyzScaleVec;
343 		vector unsigned char newNormalsLoadPermute;
344 		vector unsigned char newNormalsStorePermute;
345 		vector float zero;
346 
347 		newNormalsStorePermute = vec_lvsl(0,(float *)&newXyzScaleVec);
348 		newXyzScaleVec = *(vector float *)&newXyzScale;
349 		newXyzScaleVec = vec_perm(newXyzScaleVec,newXyzScaleVec,newNormalsStorePermute);
350 		newXyzScaleVec = vec_splat(newXyzScaleVec,0);
351 		newNormalsLoadPermute = vec_lvsl(0,newXyz);
352 		newNormalsStorePermute = vec_lvsr(0,outXyz);
353 		zero = (vector float)vec_splat_s8(0);
354 		//
355 		// just copy the vertexes
356 		//
357 		for (vertNum=0 ; vertNum < numVerts ; vertNum++,
358 			newXyz += 4, newNormals += 4,
359 			outXyz += 4, outNormal += 4)
360 		{
361 			newNormalsLoadPermute = vec_lvsl(0,newXyz);
362 			newNormalsStorePermute = vec_lvsr(0,outXyz);
363 			newNormalsVec0 = vec_ld(0,newXyz);
364 			newNormalsVec1 = vec_ld(16,newXyz);
365 			newNormalsVec0 = vec_perm(newNormalsVec0,newNormalsVec1,newNormalsLoadPermute);
366 			newNormalsIntVec = vec_unpackh(newNormalsVec0);
367 			newNormalsFloatVec = vec_ctf(newNormalsIntVec,0);
368 			newNormalsFloatVec = vec_madd(newNormalsFloatVec,newXyzScaleVec,zero);
369 			newNormalsFloatVec = vec_perm(newNormalsFloatVec,newNormalsFloatVec,newNormalsStorePermute);
370 			//outXyz[0] = newXyz[0] * newXyzScale;
371 			//outXyz[1] = newXyz[1] * newXyzScale;
372 			//outXyz[2] = newXyz[2] * newXyzScale;
373 
374 			lat = ( newNormals[0] >> 8 ) & 0xff;
375 			lng = ( newNormals[0] & 0xff );
376 			lat *= (FUNCTABLE_SIZE/256);
377 			lng *= (FUNCTABLE_SIZE/256);
378 
379 			// decode X as cos( lat ) * sin( long )
380 			// decode Y as sin( lat ) * sin( long )
381 			// decode Z as cos( long )
382 
383 			outNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
384 			outNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
385 			outNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
386 
387 			vec_ste(newNormalsFloatVec,0,outXyz);
388 			vec_ste(newNormalsFloatVec,4,outXyz);
389 			vec_ste(newNormalsFloatVec,8,outXyz);
390 		}
391 	} else {
392 		//
393 		// interpolate and copy the vertex and normal
394 		//
395 		oldXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
396 			+ (backEnd.currentEntity->e.oldframe * surf->numVerts * 4);
397 		oldNormals = oldXyz + 3;
398 
399 		oldXyzScale = MD3_XYZ_SCALE * backlerp;
400 		oldNormalScale = backlerp;
401 
402 		for (vertNum=0 ; vertNum < numVerts ; vertNum++,
403 			oldXyz += 4, newXyz += 4, oldNormals += 4, newNormals += 4,
404 			outXyz += 4, outNormal += 4)
405 		{
406 			vec3_t uncompressedOldNormal, uncompressedNewNormal;
407 
408 			// interpolate the xyz
409 			outXyz[0] = oldXyz[0] * oldXyzScale + newXyz[0] * newXyzScale;
410 			outXyz[1] = oldXyz[1] * oldXyzScale + newXyz[1] * newXyzScale;
411 			outXyz[2] = oldXyz[2] * oldXyzScale + newXyz[2] * newXyzScale;
412 
413 			// FIXME: interpolate lat/long instead?
414 			lat = ( newNormals[0] >> 8 ) & 0xff;
415 			lng = ( newNormals[0] & 0xff );
416 			lat *= 4;
417 			lng *= 4;
418 			uncompressedNewNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
419 			uncompressedNewNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
420 			uncompressedNewNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
421 
422 			lat = ( oldNormals[0] >> 8 ) & 0xff;
423 			lng = ( oldNormals[0] & 0xff );
424 			lat *= 4;
425 			lng *= 4;
426 
427 			uncompressedOldNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
428 			uncompressedOldNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
429 			uncompressedOldNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
430 
431 			outNormal[0] = uncompressedOldNormal[0] * oldNormalScale + uncompressedNewNormal[0] * newNormalScale;
432 			outNormal[1] = uncompressedOldNormal[1] * oldNormalScale + uncompressedNewNormal[1] * newNormalScale;
433 			outNormal[2] = uncompressedOldNormal[2] * oldNormalScale + uncompressedNewNormal[2] * newNormalScale;
434 
435 //			VectorNormalize (outNormal);
436 		}
437     	VectorArrayNormalize((vec4_t *)tess.normal[tess.numVertexes], numVerts);
438    	}
439 }
440 
441 #endif
442