1 //////////////////////////////////////////////////////////////////////
2 //
3 //                             Pixie
4 //
5 // Copyright � 1999 - 2003, Okan Arikan
6 //
7 // Contact: okan@cs.utexas.edu
8 //
9 //	This library is free software; you can redistribute it and/or
10 //	modify it under the terms of the GNU Lesser General Public
11 //	License as published by the Free Software Foundation; either
12 //	version 2.1 of the License, or (at your option) any later version.
13 //
14 //	This library is distributed in the hope that it will be useful,
15 //	but WITHOUT ANY WARRANTY; without even the implied warranty of
16 //	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 //	Lesser General Public License for more details.
18 //
19 //	You should have received a copy of the GNU Lesser General Public
20 //	License along with this library; if not, write to the Free Software
21 //	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22 //
23 ///////////////////////////////////////////////////////////////////////
24 ///////////////////////////////////////////////////////////////////////
25 //
26 //  File				:	stochastic.cpp
27 //  Classes				:	CStochastic
28 //  Description			:	implements the stochastic hider
29 //
30 ////////////////////////////////////////////////////////////////////////
31 #include <math.h>
32 
33 #include "stochastic.h"
34 #include "memory.h"
35 #include "random.h"
36 
37 
38 // This macro is used to allocate fragments
39 #define	newFragment(__a)	if (freeFragments == NULL)	{						\
40 								__a					=	new CFragment;			\
41 								if (CRenderer::numExtraSamples > 0) {							\
42 									__a->extraSamples = new float[CRenderer::numExtraSamples]; 	\
43 								}																\
44 							} else {											\
45 								__a					=	freeFragments;			\
46 								freeFragments		=	freeFragments->next;	\
47 							}													\
48 							numFragments++;
49 
50 // And deallocate macro
51 #define	deleteFragment(__a)	__a->next				=	freeFragments;			\
52 							freeFragments			=	__a;					\
53 							numFragments--;
54 
55 
56 ///////////////////////////////////////////////////////////////////////
57 // Class				:	CStochastic
58 // Method				:	CStochastic
59 // Description			:	Ctor
60 // Return Value			:	-
61 // Comments				:
CStochastic(int thread)62 CStochastic::CStochastic(int thread) : CReyes(thread), COcclusionCuller(), apertureGenerator(CRenderer::frame) {
63 	int		i,j;
64 	float	*cExtraSample;
65 	CPixel	*cPixel;
66 
67 	// The maximum width/height we should handle
68 	totalWidth		=	CRenderer::pixelXsamples*CRenderer::bucketWidth + 2*CRenderer::xSampleOffset;
69 	totalHeight		=	CRenderer::pixelYsamples*CRenderer::bucketHeight + 2*CRenderer::ySampleOffset;
70 
71 	// Allocate the framebuffer for extra samples (checkpointed)
72 	if (CRenderer::numExtraSamples > 0)	extraSampleMemory	=	(float *) ralloc(totalWidth*totalHeight*CRenderer::numExtraSamples*sizeof(float),CRenderer::globalMemory);
73 	else								extraSampleMemory	=	NULL;
74 
75 	// Allocate the pixels (checkpointed)
76 	cExtraSample	=	extraSampleMemory;
77 	fb				=	(CPixel **) ralloc(totalHeight*sizeof(CPixel *),CRenderer::globalMemory);
78 	for (i=0;i<totalHeight;i++) {
79 		cPixel		=	fb[i]		=	 (CPixel *) ralloc(totalWidth*sizeof(CPixel),CRenderer::globalMemory);
80 
81 		for (j=totalWidth;j>0;j--,cPixel++,cExtraSample+=CRenderer::numExtraSamples) {
82 			cPixel->last.extraSamples	=	cExtraSample;
83 			cPixel->first.extraSamples	=	NULL;
84 		}
85 	}
86 
87 	// Init the fragment buffer
88 	freeFragments	=	NULL;
89 	numFragments	=	0;
90 
91 	// Initialize the occlusion culler
92 	initCuller(max(totalWidth,totalHeight), &maxDepth);
93 }
94 
95 ///////////////////////////////////////////////////////////////////////
96 // Class				:	CStochastic
97 // Method				:	~CStochastic
98 // Description			:	Dtor
99 // Return Value			:	-
100 // Comments				:
~CStochastic()101 CStochastic::~CStochastic() {
102 	CFragment	*cFragment;
103 
104 	// Ditch the extra fragments
105 	while((cFragment = freeFragments) != NULL) {
106 		freeFragments	=	cFragment->next;
107 		if (CRenderer::numExtraSamples > 0) {
108 			delete[] cFragment->extraSamples;
109 		}
110 		delete cFragment;
111 	}
112 }
113 
114 ///////////////////////////////////////////////////////////////////////
115 // Class				:	CStochastic
116 // Method				:	rasterBegin
117 // Description			:	Begin drawing an image
118 // Return Value			:	-
119 // Comments				:
rasterBegin(int w,int h,int l,int t,int nullBucket)120 void		CStochastic::rasterBegin(int w,int h,int l,int t,int nullBucket) {
121 	int			i,j,pxi,pxj;
122 	float		zoldStart;
123 	CFragment	*cFragment;
124 
125 	assert(numFragments == 0);
126 
127 	zoldStart			=	CRenderer::clipMax;
128 
129 	// Set the digits
130 	width				=	w;
131 	height				=	h;
132 	left				=	l;
133 	top					=	t;
134 	sampleWidth			=	width*CRenderer::pixelXsamples + 2*CRenderer::xSampleOffset;
135 	sampleHeight		=	height*CRenderer::pixelYsamples + 2*CRenderer::ySampleOffset;
136 	right				=	left + sampleWidth;
137 	bottom				=	top + sampleHeight;
138 
139 	// Early-out if we have no data
140 	if (!(CRenderer::flags & OPTIONS_FLAGS_DEEP_SHADOW_RENDERING) && nullBucket) return;
141 
142 	assert(sampleWidth <= totalWidth);
143 	assert(sampleHeight <= totalHeight);
144 
145 	// Init the occlusion culler to zero
146 	initToZero();
147 	for (i=0,pxi=CRenderer::pixelYsamples-CRenderer::ySampleOffset;i<sampleHeight;i++,pxi++) {
148 		CPixel	*pixel	=	fb[i];
149 
150 		if (pxi >= CRenderer::pixelYsamples)	pxi = 0;
151 
152 		for (j=0,pxj=CRenderer::pixelXsamples-CRenderer::xSampleOffset;j<sampleWidth;j++,pxj++,pixel++) {
153 			float	aperture[2];
154 
155 			// The stratified sample
156 			pixel->jx					=	(CRenderer::jitter*(urand()-0.5f) + 0.5001011f);
157 			pixel->jy					=	(CRenderer::jitter*(urand()-0.5f) + 0.5001017f);
158 
159 			// Time of the sample for motion blur
160 			if (pxj >= CRenderer::pixelXsamples)	pxj = 0;
161 			pixel->jt					=	( pxi*CRenderer::pixelXsamples + pxj + CRenderer::jitter*(urand()-0.5f) + 0.5001011f)/(float)(CRenderer::pixelXsamples*CRenderer::pixelYsamples);
162 
163 			// Importance blend / jitter
164 			pixel->jimp					=	1.0f - ( pxj*CRenderer::pixelYsamples + pxi + CRenderer::jitter*(urand()-0.5f) + 0.5001011f)/(float)(CRenderer::pixelXsamples*CRenderer::pixelYsamples);
165 
166 			if (CRenderer::flags & OPTIONS_FLAGS_FOCALBLUR) {
167 
168 				// Aperture sample for depth of field
169 				while (TRUE) {
170 					apertureGenerator.get(aperture);
171 					aperture[0] 			= 2.0f*aperture[0] - 1.0f;
172 					aperture[1] 			= 2.0f*aperture[1] - 1.0f;
173 					if ((aperture[0]*aperture[0] + aperture[1]*aperture[1]) < 1.0f) break;
174 				}
175 
176 				pixel->jdx					=	aperture[0];
177 				pixel->jdy					=	aperture[1];
178 			} else {
179 				pixel->jdx					=	0;
180 				pixel->jdy					=	0;
181 			}
182 
183 			// Center location of the sample
184 			pixel->xcent				=	(j+pixel->jx) + left;
185 			pixel->ycent				=	(i+pixel->jy) + top;
186 
187 			pixel->z					=	CRenderer::clipMax;
188 			pixel->zold					=	zoldStart;
189 			pixel->numSplats			=	0;
190 			pixel->node					=	getNode(j,i);
191 			pixel->node->zmax			=	CRenderer::clipMax;
192 
193 
194 			cFragment					=	&pixel->last;
195 			cFragment->z				=	CRenderer::clipMax;
196 			initv(cFragment->color,0);
197 			initv(cFragment->opacity,0);
198 			cFragment->next				=	NULL;
199 			cFragment->prev				=	&pixel->first;
200 			// The last sample's extra samples are genuine AOV data
201 			if (CRenderer::numExtraSamples > 0)
202 				memcpy(cFragment->extraSamples,CRenderer::sampleDefaults,sizeof(float)*CRenderer::numExtraSamples);
203 			initv(cFragment->accumulatedOpacity,0);
204 
205 
206 			cFragment					=	&pixel->first;
207 			cFragment->z				=	-C_INFINITY;
208 			initv(cFragment->color,0);
209 			initv(cFragment->opacity,0);
210 			cFragment->next				=	&pixel->last;
211 			cFragment->prev				=	NULL;
212 			// Note: The first fragment's extra samples are not used, and the pointer is NULL
213 			assert(cFragment->extraSamples == NULL);
214 			initv(cFragment->accumulatedOpacity,0);
215 
216 			pixel->update				=	&pixel->first;
217 		}
218 	}
219 
220 	resetHierarchy();
221 }
222 
223 ///////////////////////////////////////////////////////////////////////
224 // Class				:	CStochastic
225 // Method				:	rasterDrawPrimitives
226 // Description			:	Draw bunch of primitives
227 // Return Value			:	-
228 // Comments				:
rasterDrawPrimitives(CRasterGrid * grid)229 void		CStochastic::rasterDrawPrimitives(CRasterGrid *grid) {
230 
231 // Instantiate the dispatch switch
232 #define DEFINE_STOCHASTIC_SWITCH
233 	#include "stochasticPrimitives.h"
234 #undef DEFINE_STOCHASTIC_SWITCH
235 }
236 
237 
238 
239 // The following macros help various fragment operations
240 #define depthFilterIfZMin()
241 #define depthFilterElseZMin()
242 #define depthFilterTouchNodeZMin()	touchNode(pixel->node,z);
243 
244 #define depthFilterIfZMid()			pixel->zold		=	pixel->z;
245 #define depthFilterElseZMid()		else {	pixel->zold	=	min(pixel->zold,z);	}
246 #define depthFilterTouchNodeZMid()	touchNode(pixel->node,pixel->zold);
247 
248 
249 // This macro is used to insert a fragment into the linked list for a pixel
250 #define	findSample(__dest,__z) { 																	\
251 	CFragment *lSample	=	pixel->update;															\
252 	if (__z >= lSample->z)	{																		\
253 		CFragment		*cSample;																	\
254 		for (cSample=lSample->next;__z >= cSample->z;lSample=cSample,cSample=cSample->next);		\
255 		assert(__z >= lSample->z);																	\
256 		assert(__z <= cSample->z);																	\
257 		newFragment(__dest);																		\
258 		__dest->next	=	cSample;																\
259 		__dest->prev	=	lSample;																\
260 		cSample->prev	=	__dest;																	\
261 		lSample->next	=	__dest;																	\
262 	} else {																						\
263 		CFragment		*cSample;																	\
264 		for (cSample=lSample->prev;__z < cSample->z;lSample=cSample,cSample=cSample->prev);			\
265 		assert(__z >= cSample->z);																	\
266 		assert(__z <= lSample->z);																	\
267 		newFragment(__dest);																		\
268 		__dest->next	=	lSample;																\
269 		__dest->prev	=	cSample;																\
270 		cSample->next	=	__dest;																	\
271 		lSample->prev	=	__dest;																	\
272 	}																								\
273 	pixel->update	=	__dest;																		\
274 }
275 
276 // This macro is called when an opaque fragment is inserted
277 // Note: On the assumption that the opacity really is nearly opaque, we don't really need
278 // to bother messing with pixel->last though it might technicaly be more correct to do so
279 // so these sections are commented out in updateOpaque and updateTransparent
280 
281 
282 #define updateOpaque() {																			\
283 	CFragment *cSample=pixel->last.prev;															\
284 	while(cSample->z > z) {																			\
285 		CFragment *nSample	=	cSample->prev;														\
286 		nSample->next		=	&pixel->last;														\
287 		pixel->last.prev	=	nSample;															\
288 		assert(cSample != &pixel->first);															\
289 		deleteFragment(cSample);																	\
290 		cSample				=	nSample;															\
291 	}																								\
292 	/*initv(pixel->last.accumulatedOpacity,1);*/													\
293 	pixel->update			=	cSample;															\
294 }
295 
296 // Note: due to the way we insert samples, we may have inserted a new one behind the
297 // maximum opaque depth - in which case we must flush the new sample and everything
298 // beind it.  Otherwise, we need to update accumulated opacity, and cull samples
299 // behind the point where we become opaque
300 
301 #define debugTransparencyStack(cSample) {						\
302 	printf(">> cull opac %.6f %.6f %.6f\n",O[0],O[1],O[2]);		\
303 	CFragment *ds=cSample;										\
304 	while(ds) {													\
305 		printf("opac %.6f %.6f %.6f\tropac %.6f %.6f %.6f",ds->opacity[0],ds->opacity[1],ds->opacity[2],	\
306 			ds->accumulatedOpacity[0],ds->accumulatedOpacity[1],ds->accumulatedOpacity[2]);					\
307 		if(ds==nSample) {										\
308 			if(ds==&pixel->last) printf("*");					\
309 			printf("*\n");										\
310 		} else {												\
311 			printf("\n");										\
312 		}														\
313 		ds = ds->prev;											\
314 	}															\
315 	printf("\n");												\
316 }
317 
318 #define updateTransparent(dfIf,dfElse) {															\
319 	vector O,rO;																					\
320 	const float *Oc;																				\
321 	CFragment *cSample	=	nSample->prev;															\
322 	movvv(O,cSample->accumulatedOpacity);															\
323 	if (O[0] < CRenderer::opacityThreshold[0] && O[1] < CRenderer::opacityThreshold[1] && O[2] < CRenderer::opacityThreshold[2]) {	\
324 		/* not already opaque */																	\
325 		cSample = nSample;																			\
326 	}																								\
327 	/* adjust accumulated opacities and test against threshold */									\
328 	initv(rO,1-O[0],1-O[1],1-O[2]);																	\
329 	while(cSample) {																				\
330 		Oc = cSample->opacity;																		\
331 		if (Oc[0] < 0 || Oc[1] < 0 || Oc[2] < 0) {													\
332 			rO[0] *= 1+Oc[0];																		\
333 			rO[1] *= 1+Oc[1];																		\
334 			rO[2] *= 1+Oc[2];																		\
335 		} else {																					\
336 			O[0] += Oc[0]*rO[0];																	\
337 			O[1] += Oc[1]*rO[1];																	\
338 			O[2] += Oc[2]*rO[2];																	\
339 			rO[0] *= 1-Oc[0];																		\
340 			rO[1] *= 1-Oc[1];																		\
341 			rO[2] *= 1-Oc[2];																		\
342 		}																							\
343 		movvv(cSample->accumulatedOpacity,O);														\
344 																									\
345 		if (O[0] > CRenderer::opacityThreshold[0] && O[1] > CRenderer::opacityThreshold[1] && O[2] > CRenderer::opacityThreshold[2]) {	\
346 			/* opaque after this point */															\
347 			CFragment *dSample	=	cSample->next;													\
348 			if (dSample && dSample != &pixel->last) {												\
349 				while(dSample && dSample != &pixel->last) {											\
350 					CFragment *tSample	=	dSample->next;											\
351 					deleteFragment(dSample);														\
352 					dSample				=	tSample;												\
353 				}																					\
354 				cSample->next		=	&pixel->last;												\
355 				pixel->last.prev	=	cSample;													\
356 				pixel->update		=	cSample;													\
357 				/*initv(pixel->last.color,0);				*/	\
358 				/*initv(pixel->last.opacity,0);				*/	\
359 				/*initv(pixel->last.accumulatedOpacity,1);	*/	\
360 				/*pixel->last.z = CRenderer::clipMax;		*/	\
361 				/*initv(cSample->accumulatedOpacity,1);		*/	\
362 			}																						\
363 			const float z			=	cSample->z;													\
364 			if (z < pixel->z) {																		\
365 				dfIf();																				\
366 				pixel->z			=	z;															\
367 				depthFilterTouchNode();																\
368 			} dfElse();																				\
369 			break;																					\
370 		}																							\
371 		cSample = cSample->next;																	\
372 	}																								\
373 }
374 
375 #define DEFINE_STOCHASTIC_FUNCTIONS
376 #include "stochasticPrimitives.h"
377 #undef DEFINE_STOCHASTIC_FUNCTIONS
378 
379 #undef depthFilterIfZMin
380 #undef depthFilterElseZMin
381 #undef depthFilterIfZMid
382 #undef depthFilterElseZMid
383 #undef findSample
384 #undef updateOpaque
385 
386 
387 ///////////////////////////////////////////////////////////////////////
388 // Class				:	CStochastic
389 // Method				:	rasterEnd
390 // Description			:	Get the image from the screen
391 // Return Value			:	-
392 // Comments				:
rasterEnd(float * fb2,int noObjects)393 void		CStochastic::rasterEnd(float *fb2,int noObjects) {
394 	int				i;
395 	const int		xres					=	width;
396 	const int		yres					=	height;
397 	float			*tmp;
398 
399 
400 	// Deep shadow map computation
401 	if (CRenderer::flags & OPTIONS_FLAGS_DEEP_SHADOW_RENDERING)	deepShadowCompute();
402 	else if (noObjects) {
403 		// early-out if we have no data
404 
405 		// initialize the default samples and also the extra samples using "sampleDefaults"
406 		for (tmp=fb2,i=xres*yres;i>0;i--) {
407 			*tmp++	=	0;					// r
408 			*tmp++	=	0;					// g
409 			*tmp++	=	0;					// b
410 			*tmp++	=	0;					// a
411 			*tmp++	=	C_INFINITY;			// z
412 
413 			// default-fill extra samples
414 			if (CRenderer::numExtraSamples > 0) {
415 				memcpy(tmp,CRenderer::sampleDefaults,CRenderer::numExtraSamples*sizeof(float));
416 
417 				tmp += CRenderer::numExtraSamples;
418 			}
419 		}
420 
421 
422 		return;
423 	}
424 
425 	memBegin(threadMemory);
426 
427 
428 	// Collapse the samples (transparency composite)
429 	const int		numExtraNonCompChannels		=	CRenderer::numExtraNonCompChannels;
430 	const int		numExtraCompChannels		=	CRenderer::numExtraCompChannels;
431 
432 	// pull local for speed
433 	vector			zvisibilityThreshold;
434 	movvv(zvisibilityThreshold,CRenderer::zvisibilityThreshold);
435 
436 	const int		filterWidth				=	CRenderer::pixelXsamples + 2*CRenderer::xSampleOffset;
437 	const int		filterHeight			=	CRenderer::pixelYsamples + 2*CRenderer::ySampleOffset;
438 	const float		halfFilterWidth			=	filterWidth*0.5f;
439 	const float		halfFilterHeight		=	filterHeight*0.5f;
440 	const int		pixelSize				=	6	+ CRenderer::numExtraSamples;	// alpha + depth + color + opacity + extra samples
441 	float			*fbs					=	(float *) ralloc(totalWidth*totalHeight*pixelSize*sizeof(float),threadMemory);
442 	const int		sampleLineDisplacement	=	CRenderer::pixelXsamples*pixelSize;
443 	int				sx,sy;
444 
445 	// 0	=	alpha
446 	// 1	=	z;
447 	// 2-4	=	color
448 	// 5	=	z2
449 	for (int y=0;y<sampleHeight;y++) {
450 		CPixel	*cPixel		=	fb[y];
451 		float	*cFb		=	&fbs[y*totalWidth*pixelSize];
452 		vector	ropacity;
453 
454 		for (i=sampleWidth;i>0;i--,cPixel++,cFb+=pixelSize) {
455 			CFragment	*cSample;
456 			CFragment	*oSample;
457 			float		*Z			=	&cFb[1];
458 			float		*C			=	&cFb[2];
459 			//float		*O			=	&cFb[5];
460 			vector		O;
461 			float		*Z2			=	&cFb[5];
462 			float		*ES			=	&cFb[6];
463 
464 			assert(cPixel->first.z == -C_INFINITY);
465 
466 			// We re-use cPixel->first as a marker as to whether the pixel has any matte samples,
467 			// cPixel->last ise really used, but cPixel->first is not (it's always skipped in the composite),
468 			// so this is safe to do
469 
470 			///////////////////////////////////////////////
471 			// Opacity thresholding for non composited aovs
472 			///////////////////////////////////////////////
473 
474 			{
475 			// Q: Why are we recalculating z
476 			// A: because maintaining an accurate z for transparent samples
477 			//    combined with zthreshold is very awkward.
478 			//    We desire z to have the same evaluation properties as a zmin
479 			//    aov, which will account for transparent samples.  So we pass
480 			//    thru to grab the right value.  In fully opaque scenes this
481 			//    should not add significant additional workload
482 
483 			#define NonCompositeSampleLoop() 											\
484 				const float *sampleExtra	= cSample->extraSamples;					\
485 				for(int es = 0; es < numExtraNonCompChannels; es++) {					\
486 					const int sampleOffset	= CRenderer::nonCompChannelOrder[es*4];		\
487 					const int numSamples	= CRenderer::nonCompChannelOrder[es*4+1];
488 
489 			#define copyNonCompSamples(src)												\
490 				float *ESD				= ES + sampleOffset;							\
491 				const float *ESS		= src;											\
492 				for(int ess=numSamples;ess>0;ess--) *ESD++ = *ESS++;
493 
494 			#define checkZThreshold()		(opacity[0] > zvisibilityThreshold[0]) || (opacity[1] > zvisibilityThreshold[1]) || (opacity[2] > zvisibilityThreshold[2])
495 			#define checkMatteZThreshold()	(1+opacity[0] > zvisibilityThreshold[0]) || (1+opacity[1] > zvisibilityThreshold[1]) || (1+opacity[2] > zvisibilityThreshold[2])
496 
497 			cSample	=	cPixel->first.next;
498 
499 			if (cPixel->first.opacity[0] >= 0 || cPixel->first.opacity[1] >= 0 || cPixel->first.opacity[2] >= 0) { 		// Pixel has no Matte
500 
501 				for (;cSample!=NULL;) {
502 					const float *opacity	= cSample->opacity;
503 
504 					// copy when we see sufficiently opaque sample, check against zthreshold
505 					if (checkZThreshold()) {
506 						NonCompositeSampleLoop()
507 							copyNonCompSamples(sampleExtra + sampleOffset);
508 						}
509 
510 						Z[0] = cSample->z;
511 						// We've found our sample quit out
512 						break;
513 					}
514 					cSample		=	cSample->next;
515 				}
516 			} else {
517 				for (;cSample!=NULL;) {
518 					const float	*color		= cSample->color;
519 					const float *opacity	= cSample->opacity;
520 
521 					int isMatte = (opacity[0] < 0 || opacity[1] < 0 || opacity[2] < 0);
522 						// Matte
523 
524 					if (checkMatteZThreshold()) {
525 						// Copy default non-composited AOVs - default values unless ignoring matte
526 						NonCompositeSampleLoop()
527 							const int matteMode		= CRenderer::nonCompChannelOrder[es*4+2];
528 							if (isMatte && matteMode) {
529 								copyNonCompSamples(CRenderer::sampleDefaults + sampleOffset);
530 							} else {
531 								copyNonCompSamples(sampleExtra + sampleOffset);
532 							}
533 						}
534 						// FIXME: respect matte mode for main display (also reset it)
535 
536 						//if (isMatte && matteMode)
537 						Z[0]	=	cSample->z;
538 						// We've found our sample, quit out
539 						break;
540 					}
541 
542 					cSample		=	cSample->next;
543 				}
544 			}
545 
546 			// Deal with no samples, and finding a second sample for midpoint
547 
548 			if (cSample == NULL) {
549 				// No samples that satisfy zthreshold, use defaults
550 				for(int es = 0; es < numExtraNonCompChannels; es++) {
551 					const int sampleOffset	= CRenderer::nonCompChannelOrder[es*4];
552 					const int numSamples	= CRenderer::nonCompChannelOrder[es*4+1];
553 					copyNonCompSamples(CRenderer::sampleDefaults + sampleOffset);
554 				}
555 				Z[0]	=	C_INFINITY;
556 				Z2[0]	=	C_INFINITY;
557 			} else if (CRenderer::depthFilter == DEPTH_MID) {
558 
559 				// Find the second sample for midpoint, if needed
560 				// Q: Why not just use zold?
561 				// A: It doesn't take account of transparent samples
562 
563 				for (;cSample!=NULL;cSample=cSample->next) {
564 					const float *opacity	= cSample->opacity;
565 
566 					if (opacity[0] < 0 || opacity[1] < 0 || opacity[2] < 0) {
567 						if (checkMatteZThreshold()) {
568 							// FIXME: respect matteMode
569 							Z2[0]	=	cSample->z;
570 							break;
571 						}
572 					} else {
573 						if (checkZThreshold()) {
574 							// FIXME: respect matteMode
575 							Z2[0]	=	cSample->z;
576 							break;
577 						}
578 					}
579 				}
580 				if (cSample == NULL) {
581 					// no second sample, use the first (we have one,
582 					// otherwise we'd be in the first case)
583 					Z2[0]	=	Z[0];
584 				}
585 				Z2[0] = max(Z2[0],cPixel->zold);
586  			}
587 
588 			#undef NonCompositeSampleLoop
589 			#undef copyNonCompSamples
590 			#undef checkZThreshold
591 			#undef checkMatteZThreshold
592 
593 			// clip-correct the depth components
594 			if (Z[0] >= CRenderer::clipMax)			Z[0]	=	C_INFINITY;
595 			if (Z2[0] >= CRenderer::clipMax)		Z2[0]	=	C_INFINITY;
596 			}
597 
598 			///////////////////////////////////////////////
599 			// Composite loop for composited aovs, rgba
600 			// Note: we also remove the samples here
601 			///////////////////////////////////////////////
602 
603 			{
604 			#define compositeSampleLoop()										\
605 				const float *sampleExtra = cSample->extraSamples;				\
606 				for(int es = 0; es < numExtraCompChannels; es++) {				\
607 					const int sampleOffset = CRenderer::compChannelOrder[es*4];
608 
609 			cSample	=	cPixel->first.next;
610 
611 			if (!(cPixel->first.opacity[0] < 0 || cPixel->first.opacity[1] < 0 || cPixel->first.opacity[2] < 0)) {
612 				// Pixel samples have no Mattes
613 
614 				// Get the base color and opacity
615 				movvv(C,cSample->color);
616 				movvv(O,cSample->opacity);
617 				ropacity[0]	=	1-O[0];
618 				ropacity[1]	=	1-O[1];
619 				ropacity[2]	=	1-O[2];
620 
621 				// If this sample has no valid samples, this will fill in the sample defaults
622 				// because pixel->last's AOVs get initialized to the defaults
623 				compositeSampleLoop()
624 					movvv(ES + sampleOffset,sampleExtra + sampleOffset);
625 				}
626 
627 				// Transparency collapse, and delete samples
628 				oSample		=	cSample;
629 				cSample		=	cSample->next;
630 				for (;cSample!=NULL;) {
631 					deleteFragment(oSample);
632 					const float	*color		= cSample->color;
633 					const float *opacity	= cSample->opacity;
634 					sampleExtra				= cSample->extraSamples;
635 
636 					// Composite
637 					C[0]		+=	ropacity[0]*color[0];
638 					C[1]		+=	ropacity[1]*color[1];
639 					C[2]		+=	ropacity[2]*color[2];
640 					O[0]		+=	ropacity[0]*opacity[0];
641 					O[1]		+=	ropacity[1]*opacity[1];
642 					O[2]		+=	ropacity[2]*opacity[2];
643 
644 					// Composite extra samples
645 					compositeSampleLoop()
646 						ES[sampleOffset + 0] += ropacity[0]*sampleExtra[sampleOffset + 0];
647 						ES[sampleOffset + 1] += ropacity[1]*sampleExtra[sampleOffset + 1];
648 						ES[sampleOffset + 2] += ropacity[2]*sampleExtra[sampleOffset + 2];
649 					}
650 
651 					ropacity[0]	*=	1-opacity[0];
652 					ropacity[1]	*=	1-opacity[1];
653 					ropacity[2]	*=	1-opacity[2];
654 
655 					oSample		=	cSample;
656 					cSample		=	cSample->next;
657 				}
658 			} else {
659 				// We have mattes in the stack
660 
661 				// Get the base color, opacity and extra samples
662 				// This will install defaults if there are no valid samples
663 				if (cSample->opacity[0] < 0 || cSample->opacity[1] < 0 || cSample->opacity[2] < 0) {
664 					// Matte base sample
665 					initv(C,0);
666 					initv(O,0);
667 					ropacity[0]	=	1+cSample->opacity[0];
668 					ropacity[1]	=	1+cSample->opacity[1];
669 					ropacity[2]	=	1+cSample->opacity[2];
670 
671 					// Composite AOVs with ignore matte flag
672 					compositeSampleLoop()
673 						const int matteMode		= CRenderer::compChannelOrder[es*4+2];
674 						if (matteMode)		initv(ES + sampleOffset,0);
675 						else				movvv(ES + sampleOffset,sampleExtra + sampleOffset);
676 					}
677 				} else {
678 					// Non-matte base sample
679 					movvv(C,cSample->color);
680 					movvv(O,cSample->opacity);
681 					ropacity[0]	=	1-O[0];
682 					ropacity[1]	=	1-O[1];
683 					ropacity[2]	=	1-O[2];
684 
685 					// Composite extra samples
686 					compositeSampleLoop()
687 						movvv(ES + sampleOffset,sampleExtra + sampleOffset);
688 					}
689 				}
690 
691 				// Transparency collapse, and delete samples
692 				oSample		=	cSample;
693 				cSample		=	cSample->next;
694 				for (;cSample!=NULL;) {
695 					deleteFragment(oSample);
696 					const float	*color		= cSample->color;
697 					const float *opacity	= cSample->opacity;
698 
699 					if (opacity[0] < 0 || opacity[1] < 0 || opacity[2] < 0) {
700 						// Composite Matte
701 						ropacity[0]	*=	1+opacity[0];
702 						ropacity[1]	*=	1+opacity[1];
703 						ropacity[2]	*=	1+opacity[2];
704 
705 						// Composite AOVs with ignore matte flag
706 						compositeSampleLoop()
707 							const int matteMode	=	CRenderer::compChannelOrder[es*4+2];
708 							if (!matteMode)		 	movvv(ES + sampleOffset,sampleExtra + sampleOffset);
709 						}
710 					}
711 					else {
712 						// Composite non-matte
713 						C[0]		+=	ropacity[0]*color[0];
714 						C[1]		+=	ropacity[1]*color[1];
715 						C[2]		+=	ropacity[2]*color[2];
716 						O[0]		+=	ropacity[0]*opacity[0];
717 						O[1]		+=	ropacity[1]*opacity[1];
718 						O[2]		+=	ropacity[2]*opacity[2];
719 
720 						compositeSampleLoop()
721 							ES[sampleOffset + 0] += ropacity[0]*sampleExtra[sampleOffset + 0];
722 							ES[sampleOffset + 1] += ropacity[1]*sampleExtra[sampleOffset + 1];
723 							ES[sampleOffset + 2] += ropacity[2]*sampleExtra[sampleOffset + 2];
724 						}
725 
726 						ropacity[0]	*=	1-opacity[0];
727 						ropacity[1]	*=	1-opacity[1];
728 						ropacity[2]	*=	1-opacity[2];
729 					}
730 
731 					oSample		=	cSample;
732 					cSample		=	cSample->next;
733 				}
734 			}
735 
736 			// Alpha is the average opacity
737 			// I know this is wrong but this is more useful
738 			cFb[0]			=	((O[0] + O[1] + O[2])*0.3333333333333333f);
739 		}
740 	}
741 	}
742 
743 	// Note: at this point, all the subpixel samples are valid
744 	// We could output subpixel samples here if we wish to support a subpixel hider
745 
746 	// Clear the memory first
747 	for (tmp=fb2,i=xres*yres*CRenderer::numSamples;i>0;i--)	*tmp++	=	0;
748 
749 	// Perform non area-filtering for depth
750 	// Note: technically, this should filter in the specified width/height
751 	// but > 1 pixel doesn't really make sense
752 	switch(CRenderer::depthFilter) {
753 	case DEPTH_MIN:
754 
755 		for (int y=0;y<yres;y++) {
756 			float			*cPixelLine		=	&fb2[y*xres*CRenderer::numSamples];
757 			float			*cPixel			=	cPixelLine;
758 			float			*cSampleLine	=	&fbs[((y*CRenderer::pixelYsamples+CRenderer::ySampleOffset)*totalWidth+CRenderer::xSampleOffset)*pixelSize];
759 			float			*cSample		=	cSampleLine;
760 
761 			for (i=0;i<xres;i++,cPixel+=CRenderer::numSamples,cSample+=CRenderer::pixelXsamples*pixelSize) {
762 				cPixel[4]		=	cSample[1];		// initialize with first sample
763 			}
764 
765 			cSample		=	cSampleLine;
766 			for (sy=0;sy<CRenderer::pixelYsamples;sy++) {
767 				cPixel = cPixelLine;
768 				for (i=0;i<xres;i++) {
769 					for (sx=0;sx<CRenderer::pixelXsamples;sx++,cSample+=pixelSize) {
770 						cPixel[4]		=	min(cPixel[4],cSample[1]);
771 					}
772 					cPixel += CRenderer::numSamples;
773 				}
774 				//cSample += CRenderer::xSampleOffset*pixelSize;
775 				cSample = cSampleLine + totalWidth*pixelSize;
776 				cSampleLine = cSample;
777 			}
778 		}
779 
780 		break;
781 	case DEPTH_MAX:
782 
783 		for (int y=0;y<yres;y++) {
784 			float			*cPixelLine		=	&fb2[y*xres*CRenderer::numSamples];
785 			float			*cPixel			=	cPixelLine;
786 			float			*cSampleLine	=	&fbs[((y*CRenderer::pixelYsamples+CRenderer::ySampleOffset)*totalWidth+CRenderer::xSampleOffset)*pixelSize];
787 			float			*cSample		=	cSampleLine;
788 
789 			for (i=0;i<xres;i++,cPixel+=CRenderer::numSamples,cSample+=CRenderer::pixelXsamples*pixelSize) {
790 				cPixel[4]		=	cSample[1];		// initialize with first sample
791 			}
792 
793 			cSample		=	cSampleLine;
794 			for (sy=0;sy<CRenderer::pixelYsamples;sy++) {
795 				cPixel = cPixelLine;
796 				for (i=0;i<xres;i++) {
797 					for (sx=0;sx<CRenderer::pixelXsamples;sx++,cSample+=pixelSize) {
798 						cPixel[4]		=	max(cPixel[4],cSample[1]);
799 					}
800 					cPixel += CRenderer::numSamples;
801 				}
802 				//cSample += CRenderer::xSampleOffset*pixelSize;
803 				cSample = cSampleLine + totalWidth*pixelSize;
804 				cSampleLine = cSample;
805 			}
806 		}
807 
808 		break;
809 	case DEPTH_AVG:
810 
811 		for (int y=0;y<yres;y++) {
812 			float			*cPixelLine		=	&fb2[y*xres*CRenderer::numSamples];
813 			float			*cPixel			=	cPixelLine;
814 			float			*cSampleLine	=	&fbs[((y*CRenderer::pixelYsamples+CRenderer::ySampleOffset)*totalWidth+CRenderer::xSampleOffset)*pixelSize];
815 			float			*cSample		=	cSampleLine;
816 
817 			for (i=0;i<xres;i++,cPixel+=CRenderer::numSamples,cSample+=CRenderer::pixelXsamples*pixelSize) {
818 				cPixel[4]		=	0;		// initialize with zero
819 			}
820 
821 			cSample		=	cSampleLine;
822 			for (sy=0;sy<CRenderer::pixelYsamples;sy++) {
823 				cPixel = cPixelLine;
824 				for (i=0;i<xres;i++) {
825 					for (sx=0;sx<CRenderer::pixelXsamples;sx++,cSample+=pixelSize) {
826 						cPixel[4]		+=	cSample[1];
827 					}
828 					cPixel += CRenderer::numSamples;
829 				}
830 				//cSample += CRenderer::xSampleOffset*pixelSize;
831 				cSample = cSampleLine + totalWidth*pixelSize;
832 				cSampleLine = cSample;
833 			}
834 		}
835 
836 		{
837 			const float normalizer = 1.0f/((float)CRenderer::pixelXsamples*(float)CRenderer::pixelYsamples);
838 			for (int y=0;y<yres;y++) {
839 				float			*cPixel		=	&fb2[y*xres*CRenderer::numSamples];
840 				for (i=0;i<xres;i++,cPixel+=CRenderer::numSamples) {
841 					cPixel[4] 	*= normalizer;
842 				}
843 			}
844 		}
845 
846 		break;
847 
848 	case DEPTH_MID:
849 		/// FIXME: for midpoint should be working out front most 2 planes (min of front, min of 2nd)
850 		// and do midpoint on the - need extra working space to do this.
851 
852 		for (int y=0;y<yres;y++) {
853 			float			*cPixelLine		=	&fb2[y*xres*CRenderer::numSamples];
854 			float			*cPixel			=	cPixelLine;
855 			float			*cSampleLine	=	&fbs[((y*CRenderer::pixelYsamples+CRenderer::ySampleOffset)*totalWidth+CRenderer::xSampleOffset)*pixelSize];
856 			float			*cSample		=	cSampleLine;
857 
858 			for (i=0;i<xres;i++,cPixel+=CRenderer::numSamples,cSample+=CRenderer::pixelXsamples*pixelSize) {
859 				cPixel[4]		=	0;		// initialize with zero
860 			}
861 
862 			cSample		=	cSampleLine;
863 			for (sy=0;sy<CRenderer::pixelYsamples;sy++) {
864 				cPixel = cPixelLine;
865 				for (i=0;i<xres;i++) {
866 					for (sx=0;sx<CRenderer::pixelXsamples;sx++,cSample+=pixelSize) {
867 						cPixel[4]		+=	0.5f*(cSample[1] + cSample[5]);		// FIXME: this is wrong, we're doing average on midpoint values
868 					}
869 					cPixel += CRenderer::numSamples;
870 				}
871 				//cSample += CRenderer::xSampleOffset*pixelSize;
872 				cSample = cSampleLine + totalWidth*pixelSize;
873 				cSampleLine = cSample;
874 			}
875 		}
876 
877 		{
878 			const float normalizer = 1.0f/((float)CRenderer::pixelXsamples*(float)CRenderer::pixelYsamples);
879 			for (int y=0;y<yres;y++) {
880 				float			*cPixel		=	&fb2[y*xres*CRenderer::numSamples];
881 				for (i=0;i<xres;i++,cPixel+=CRenderer::numSamples) {
882 					cPixel[4] 	*= normalizer;
883 				}
884 			}
885 		}
886 	}
887 
888 
889 	// FIXME: Filter non-composited samples
890 	if (numExtraNonCompChannels > 0) {
891 
892 	}
893 
894 	// Filter the samples
895 	for (int y=0;y<yres;y++) {
896 		for (sy=0;sy<filterHeight;sy++) {
897 			for (sx=0;sx<filterWidth;sx++) {
898 				float			*pixelLine		=	&fb2[y*xres*CRenderer::numSamples];
899 				const float		*sampleLine		=	&fbs[((y*CRenderer::pixelYsamples+sy)*totalWidth + sx)*pixelSize];
900 				const float		xOffset			=	sx - halfFilterWidth;
901 				const float		yOffset			=	sy - halfFilterHeight;
902 				const float		filterResponse	=	CRenderer::pixelFilterKernel[sy*filterWidth + sx];
903 
904 				for (i=0;i<xres;i++) {
905 					int			es;
906 
907 					pixelLine[0]				+=	filterResponse*sampleLine[2];
908 					pixelLine[1]				+=	filterResponse*sampleLine[3];
909 					pixelLine[2]				+=	filterResponse*sampleLine[4];
910 					pixelLine[3]				+=	filterResponse*sampleLine[0];
911 
912 					// Filter the extra samples here
913 					for (es=0;es<CRenderer::numExtraSamples;es++) {
914 						pixelLine[5+es]			+=	filterResponse*sampleLine[6+es];
915 					}
916 
917 					// Advance
918 					pixelLine					+=	CRenderer::numSamples;
919 					sampleLine					+=	sampleLineDisplacement;
920 				}
921 			}
922 		}
923 	}
924 
925 	memEnd(threadMemory);
926 }
927 
928 
929 // A transient data structure to hold TSM data
930 class	CTSMData {
931 public:
932 	float	origin[4];
933 	float	lastZ;
934 	float	rSlopeMin;
935 	float	gSlopeMin;
936 	float	bSlopeMin;
937 	float	rSlopeMax;
938 	float	gSlopeMax;
939 	float	bSlopeMax;
940 	FILE	*deepShadowFile;
941 	float	tsmThreshold;
942 };
943 
944 ///////////////////////////////////////////////////////////////////////
945 // Function				:	outSample
946 // Description			:	This function is used to output a depth sample
947 // Return Value			:	-
948 // Comments				:
949 inline	void	startSample(FILE *outFile,float threshold,CTSMData &data) {
950 	data.deepShadowFile		=	outFile;
951 	data.tsmThreshold		=	threshold;
952 
953 	data.rSlopeMax		=	C_INFINITY;
954 	data.gSlopeMax		=	C_INFINITY;
955 	data.bSlopeMax		=	C_INFINITY;
956 	data.rSlopeMin		=	-C_INFINITY;
957 	data.gSlopeMin		=	-C_INFINITY;
958 	data.bSlopeMin		=	-C_INFINITY;
959 
960 	// Output the first sample (at z=-C_INFINITY)
961 	data.origin[0]		=	-C_INFINITY;
962 	data.origin[1]		=	1;
963 	data.origin[2]		=	1;
964 	data.origin[3]		=	1;
965 	fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
966 	data.lastZ			=	-C_INFINITY;
967 }
968 
969 ///////////////////////////////////////////////////////////////////////
970 // Function				:	outSample
971 // Description			:	This function is used to output a depth sample
972 // Return Value			:	-
973 // Comments				:
974 inline	void	outSample(float cZ,const float *opacity,CTSMData &data) {
975 	// Always output the closest sample
976 	if (data.origin[0] == -C_INFINITY) {
977 		data.origin[0]	=	cZ;
978 		data.origin[1]	=	opacity[0];
979 		data.origin[2]	=	opacity[1];
980 		data.origin[3]	=	opacity[2];
981 
982 		fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
983 	} else if (cZ == data.origin[0]) {	// Do we have a step ?
984 		const float	dr	=	absf(data.origin[1] - opacity[0]);
985 		const float	dg	=	absf(data.origin[2] - opacity[1]);
986 		const float	db	=	absf(data.origin[3] - opacity[2]);
987 
988 		// Is the step small enough ?
989 		if ((dr >= data.tsmThreshold) || (dg >= data.tsmThreshold) || (db >= data.tsmThreshold)) {
990 
991 			// No, output the step
992 			data.origin[1]	=	opacity[0];
993 			data.origin[2]	=	opacity[1];
994 			data.origin[3]	=	opacity[2];
995 			fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
996 		}
997 	} else {
998 		// Check for the window of validity
999 		const float	denom		=	1 / (cZ - data.origin[0]);
1000 		float		crSlopeMax	=	(opacity[0] - data.origin[1] + data.tsmThreshold) * denom;
1001 		float		cgSlopeMax	=	(opacity[1] - data.origin[2] + data.tsmThreshold) * denom;
1002 		float		cbSlopeMax	=	(opacity[2] - data.origin[3] + data.tsmThreshold) * denom;
1003 		float		crSlopeMin	=	(opacity[0] - data.origin[1] - data.tsmThreshold) * denom;
1004 		float		cgSlopeMin	=	(opacity[1] - data.origin[2] - data.tsmThreshold) * denom;
1005 		float		cbSlopeMin	=	(opacity[2] - data.origin[3] - data.tsmThreshold) * denom;
1006 
1007 		crSlopeMax				=	min(crSlopeMax,data.rSlopeMax);
1008 		cgSlopeMax				=	min(cgSlopeMax,data.gSlopeMax);
1009 		cbSlopeMax				=	min(cbSlopeMax,data.bSlopeMax);
1010 
1011 		crSlopeMin				=	max(crSlopeMin,data.rSlopeMin);
1012 		cgSlopeMin				=	max(cgSlopeMin,data.gSlopeMin);
1013 		cbSlopeMin				=	max(cbSlopeMin,data.bSlopeMin);
1014 
1015 		if ((crSlopeMin < crSlopeMax) && (cgSlopeMin < cgSlopeMax) && (cbSlopeMin < cbSlopeMax)) {
1016 			// We're in range
1017 			data.rSlopeMax		=	crSlopeMax;
1018 			data.gSlopeMax		=	cgSlopeMax;
1019 			data.bSlopeMax		=	cbSlopeMax;
1020 
1021 			data.rSlopeMin		=	crSlopeMin;
1022 			data.gSlopeMin		=	cgSlopeMin;
1023 			data.bSlopeMin		=	cbSlopeMin;
1024 		} else {
1025 			data.origin[1]		+=	(data.rSlopeMin + data.rSlopeMax)*(data.lastZ - data.origin[0])*0.5f;
1026 			data.origin[2]		+=	(data.gSlopeMin + data.gSlopeMax)*(data.lastZ - data.origin[0])*0.5f;
1027 			data.origin[3]		+=	(data.bSlopeMin + data.bSlopeMax)*(data.lastZ - data.origin[0])*0.5f;
1028 			data.origin[0]		=	data.lastZ;
1029 			fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
1030 
1031 			data.rSlopeMax		=	C_INFINITY;
1032 			data.gSlopeMax		=	C_INFINITY;
1033 			data.bSlopeMax		=	C_INFINITY;
1034 			data.rSlopeMin		=	-C_INFINITY;
1035 			data.gSlopeMin		=	-C_INFINITY;
1036 			data.bSlopeMin		=	-C_INFINITY;
1037 
1038 			// Do we have a step ?
1039 			if (cZ == data.origin[0]) {
1040 				const float	dr	=	absf(data.origin[1] - opacity[0]);
1041 				const float	dg	=	absf(data.origin[2] - opacity[1]);
1042 				const float	db	=	absf(data.origin[3] - opacity[2]);
1043 
1044 				// Is the step small enough ?
1045 				if ((dr >= data.tsmThreshold) || (dg >= data.tsmThreshold) || (db >= data.tsmThreshold)) {
1046 
1047 					// No, output the step
1048 					data.origin[1]	=	opacity[0];
1049 					data.origin[2]	=	opacity[1];
1050 					data.origin[3]	=	opacity[2];
1051 					fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
1052 				}
1053 			} else {
1054 				const float	denom		=	1 / (cZ - data.origin[0]);
1055 				data.rSlopeMax	=	(opacity[0] - data.origin[1] + data.tsmThreshold) * denom;
1056 				data.gSlopeMax	=	(opacity[1] - data.origin[2] + data.tsmThreshold) * denom;
1057 				data.bSlopeMax	=	(opacity[2] - data.origin[3] + data.tsmThreshold) * denom;
1058 				data.rSlopeMin	=	(opacity[0] - data.origin[1] - data.tsmThreshold) * denom;
1059 				data.gSlopeMin	=	(opacity[1] - data.origin[2] - data.tsmThreshold) * denom;
1060 				data.bSlopeMin	=	(opacity[2] - data.origin[3] - data.tsmThreshold) * denom;
1061 			}
1062 		}
1063 	}
1064 
1065 	data.lastZ	=	cZ;
1066 }
1067 
1068 ///////////////////////////////////////////////////////////////////////
1069 // Function				:	finishSample
1070 // Description			:	This function is used to output the last sample
1071 // Return Value			:	-
1072 // Comments				:
1073 inline	void	finishSample(float cZ,const float *opacity,CTSMData &data) {
1074 	if (data.origin[0] < cZ) {
1075 		data.origin[1]		+=	(data.rSlopeMin + data.rSlopeMax)*(data.lastZ - data.origin[0])*0.5f;
1076 		data.origin[2]		+=	(data.gSlopeMin + data.gSlopeMax)*(data.lastZ - data.origin[0])*0.5f;
1077 		data.origin[3]		+=	(data.bSlopeMin + data.bSlopeMax)*(data.lastZ - data.origin[0])*0.5f;
1078 		data.origin[0]		=	data.lastZ;
1079 		fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
1080 	}
1081 
1082 	data.origin[0]		=	cZ;
1083 	data.origin[1]		=	opacity[0];
1084 	data.origin[2]		=	opacity[1];
1085 	data.origin[3]		=	opacity[2];
1086 	fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
1087 
1088 	data.origin[0]		=	C_INFINITY;
1089 	fwrite(data.origin,sizeof(float),4,data.deepShadowFile);
1090 }
1091 
1092 ///////////////////////////////////////////////////////////////////////
1093 // Class				:	CStochastic
1094 // Method				:	filterSamples
1095 // Description			:	Filter / output the pixel
1096 // Return Value			:	-
1097 // Comments				:
1098 void			CStochastic::filterSamples(int numSamples,CFragment **samples,float *weights) {
1099 	int			minSample		=	0;
1100 	int			i;
1101 	vector		opacity;
1102 	CTSMData	data;
1103 
1104 	initv(opacity,1,1,1);		// The current opacity
1105 
1106 	startSample(CRenderer::deepShadowFile,CRenderer::tsmThreshold,data);				// The beginning of a pixel
1107 
1108 	// Find the closest sample
1109 	for (i=1;i<numSamples;i++) {
1110 		if (samples[i]->z < samples[minSample]->z) {
1111 			minSample	=	i;
1112 		}
1113 	}
1114 
1115 	// Filter / output pixels incrementally
1116 	while(TRUE) {
1117 		int				stop		=	FALSE;							// TRUE when the opacity drops below 0
1118 		const CFragment	*cSample	=	samples[minSample];
1119 		const float		cZ			=	cSample->z;						// The current Z coordinate
1120 		float			*oldOpacity	=	weights + (minSample<<2);
1121 		vector			newOpacity;
1122 
1123 		outSample(cZ,opacity,data);
1124 
1125 		newOpacity[0]				=	oldOpacity[1]*(1-cSample->opacity[0]);
1126 		newOpacity[1]				=	oldOpacity[2]*(1-cSample->opacity[1]);
1127 		newOpacity[2]				=	oldOpacity[3]*(1-cSample->opacity[2]);
1128 
1129 		opacity[0]					+=	(newOpacity[0] - oldOpacity[1])*oldOpacity[0];
1130 		opacity[1]					+=	(newOpacity[1] - oldOpacity[2])*oldOpacity[0];
1131 		opacity[2]					+=	(newOpacity[2] - oldOpacity[3])*oldOpacity[0];
1132 
1133 		oldOpacity[1]				=	newOpacity[0];
1134 		oldOpacity[2]				=	newOpacity[1];
1135 		oldOpacity[3]				=	newOpacity[2];
1136 
1137 		if (opacity[0] <= 0) {
1138 			opacity[0]	=	0;
1139 			stop++;
1140 		}
1141 
1142 		if (opacity[1] <= 0) {
1143 			opacity[1]	=	0;
1144 			stop++;
1145 		}
1146 
1147 		if (opacity[2] <= 0) {
1148 			opacity[2]	=	0;
1149 			stop++;
1150 		}
1151 
1152 		// Advance the minSample
1153 		if ((samples[minSample]		=	samples[minSample]->next) == NULL) {
1154 			int	nindex				=	minSample << 2;
1155 			int	oindex				=	(numSamples-1) << 2;
1156 			samples[minSample]		=	samples[numSamples-1];
1157 			weights[nindex + 0]		=	weights[oindex + 0];
1158 			weights[nindex + 1]		=	weights[oindex + 1];
1159 			weights[nindex + 2]		=	weights[oindex + 2];
1160 			weights[nindex + 3]		=	weights[oindex + 3];
1161 			numSamples--;
1162 			if (numSamples == 0) {
1163 				stop	=	3;
1164 			}
1165 		}
1166 
1167 		// Decide whether we should stop or keep going
1168 		if (stop == 3) {
1169 			finishSample(cZ,opacity,data);
1170 			break;
1171 		} else {
1172 			outSample(cZ,opacity,data);
1173 		}
1174 
1175 		for (minSample=0,i=1;i<numSamples;i++) {
1176 			if (samples[i]->z < samples[minSample]->z) {
1177 				minSample	=	i;
1178 			}
1179 		}
1180 	}
1181 }
1182 
1183 ///////////////////////////////////////////////////////////////////////
1184 // Class				:	CStochastic
1185 // Method				:	deepShadowCompute
1186 // Description			:	Compute/write deep shadow map data
1187 // Return Value			:	-
1188 // Comments				:
1189 void		CStochastic::deepShadowCompute() {
1190 	int			i;
1191 	const int	xres				=	width;
1192 	const int	yres				=	height;
1193 	const int	filterWidth			=	CRenderer::pixelXsamples + 2*CRenderer::xSampleOffset;
1194 	const int	filterHeight		=	CRenderer::pixelYsamples + 2*CRenderer::ySampleOffset;
1195 	const float	invPixelXsamples	=	1 / (float) CRenderer::pixelXsamples;
1196 	const float	invPixelYsamples	=	1 / (float) CRenderer::pixelYsamples;
1197 	int			prevFilePos;
1198 	int			numSamples;
1199 	int			x,y;
1200 	CFragment	**samples;
1201 	CFragment	**fSamples;
1202 	float		*fWeights;
1203 
1204 	osLock(CRenderer::deepShadowMutex);
1205 
1206 	memBegin(threadMemory);
1207 
1208 	prevFilePos		=	ftell(CRenderer::deepShadowFile);
1209 
1210 	// Allocate the memory for misc junk
1211 	samples			=	(CFragment **)	ralloc(totalHeight*totalWidth*sizeof(CFragment*),threadMemory);
1212 	fSamples		=	(CFragment **)	ralloc(filterWidth*filterHeight*sizeof(CFragment*),threadMemory);
1213 	fWeights		=	(float *)		ralloc(filterWidth*filterHeight*sizeof(float)*4,threadMemory);
1214 
1215 	// Init the samples
1216 	for (i=0;i<totalWidth*totalHeight;i++)	samples[i]	=	NULL;
1217 
1218 	// Collect the samples first
1219 	for (i=0,y=0;y<sampleHeight;y++) {
1220 		for (x=0;x<sampleWidth;x++,i++) {
1221 			samples[i]	=	fb[y][x].first.next;
1222 		}
1223 	}
1224 
1225 	assert(CRenderer::bucketWidth	>= xres);
1226 	assert(CRenderer::bucketHeight	>= yres);
1227 
1228 	// Compute the visibility function for each pixel
1229 	for (y=0;y<CRenderer::bucketHeight;y++) {
1230 		for (x=0;x<CRenderer::bucketWidth;x++) {
1231 
1232 			if ((x < xres) && (y < yres)) {
1233 				// Gather the samples for this pixel and the filter response
1234 				int		sx,sy;
1235 				float	filterSum		=	0;
1236 
1237 				for (i=0,sy=0;sy<filterHeight;sy++) {
1238 					for (sx=0;sx<filterWidth;sx++,i++) {
1239 						const int		xsample	=	x*CRenderer::pixelXsamples + sx;
1240 						const int		ysample	=	y*CRenderer::pixelYsamples + sy;
1241 						const CPixel	*pixels	=	&fb[ysample][xsample];
1242 						const float		cx		=	(sx + pixels->jx - filterWidth*0.5f*invPixelXsamples);
1243 						const float		cy		=	(sy + pixels->jy - filterHeight*0.5f*invPixelYsamples);
1244 						fSamples[i]				=	samples[ysample*sampleWidth+xsample];
1245 						fWeights[i<<2]			=	CRenderer::pixelFilter(cx,cy,CRenderer::pixelFilterWidth,CRenderer::pixelFilterHeight);
1246 						filterSum				+=	fWeights[i<<2];
1247 					}
1248 				}
1249 
1250 				// Normalize the pixel filter responses
1251 				numSamples	=	i;
1252 				for (i=0;i<numSamples;i++) {
1253 					const int	index	=	i << 2;
1254 					fWeights[index+0]			/=	filterSum;
1255 					fWeights[index+1]			=	1;
1256 					fWeights[index+2]			=	1;
1257 					fWeights[index+3]			=	1;
1258 				}
1259 
1260 				// Filter/write the pixels
1261 				filterSamples(numSamples,fSamples,fWeights);
1262 			} else {
1263 				// Output a dummy pixel
1264 				float	dummy[4];
1265 
1266 				dummy[0]	=	-C_INFINITY;
1267 				dummy[1]	=	1;
1268 				dummy[2]	=	1;
1269 				dummy[3]	=	1;
1270 				fwrite(dummy,sizeof(float),4,CRenderer::deepShadowFile);
1271 
1272 				dummy[0]	=	C_INFINITY;
1273 				dummy[1]	=	1;
1274 				dummy[2]	=	1;
1275 				dummy[3]	=	1;
1276 				fwrite(dummy,sizeof(float),4,CRenderer::deepShadowFile);
1277 			}
1278 		}
1279 	}
1280 
1281 	// Record the index in the file
1282 	//	we now save sizes too in order to support arbitrary bucket orders
1283 	//	indices are now bucket starts
1284 	const int tileIndex = currentYBucket*CRenderer::xBuckets + currentXBucket;
1285 	CRenderer::deepShadowIndex[tileIndex]											=	prevFilePos;
1286 	CRenderer::deepShadowIndex[tileIndex + CRenderer::xBuckets*CRenderer::yBuckets]	=	ftell(CRenderer::deepShadowFile) - prevFilePos;
1287 
1288 	memEnd(threadMemory);
1289 
1290 	osUnlock(CRenderer::deepShadowMutex);
1291 }
1292 
1293