1 /*******************************************************************************
2  * texture.cpp
3  *
4  * This module implements texturing functions such as noise, turbulence and
5  * texture transformation functions. The actual texture routines are in the
6  * files pigment.c & normal.c.
7  * The noise function used here is the one described by Ken Perlin in
8  * "Hypertexture", SIGGRAPH '89 Conference Proceedings page 253.
9  *
10  * ---------------------------------------------------------------------------
11  * Persistence of Vision Ray Tracer ('POV-Ray') version 3.7.
12  * Copyright 1991-2013 Persistence of Vision Raytracer Pty. Ltd.
13  *
14  * POV-Ray is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Affero General Public License as
16  * published by the Free Software Foundation, either version 3 of the
17  * License, or (at your option) any later version.
18  *
19  * POV-Ray is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  * GNU Affero General Public License for more details.
23  *
24  * You should have received a copy of the GNU Affero General Public License
25  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
26  * ---------------------------------------------------------------------------
27  * POV-Ray is based on the popular DKB raytracer version 2.12.
28  * DKBTrace was originally written by David K. Buck.
29  * DKBTrace Ver 2.0-2.12 were written by David K. Buck & Aaron A. Collins.
30  * ---------------------------------------------------------------------------
31  * $File: //depot/public/povray/3.x/source/backend/texture/texture.cpp $
32  * $Revision: #1 $
33  * $Change: 6069 $
34  * $DateTime: 2013/11/06 11:59:40 $
35  * $Author: chrisc $
36  *******************************************************************************/
37 
38 /*
39    Some texture ideas garnered from SIGGRAPH '85 Volume 19 Number 3,
40    "An Image Synthesizer" By Ken Perlin.
41    Further Ideas Garnered from "The RenderMan Companion" (Addison Wesley)
42 */
43 
44 // frame.h must always be the first POV file included (pulls in platform config)
45 #include "backend/frame.h"
46 #include "backend/texture/texture.h"
47 #include "backend/texture/pigment.h"
48 #include "backend/texture/normal.h"
49 #include "backend/support/imageutil.h"
50 #include "backend/math/vector.h"
51 #include "base/pov_err.h"
52 
53 #if defined(USE_AVX_FMA4_FOR_NOISE)
54 	#include "avxfma4check.h"
55 #endif
56 
57 #include <algorithm>
58 
59 // this must be the last file included
60 #include "base/povdebug.h"
61 
62 namespace pov
63 {
64 
65 // TODO FIXME GLOBAL VARIABLE
66 static unsigned int next_rand = 1;
67 
68 /*****************************************************************************
69 * Static functions
70 ******************************************************************************/
71 
72 static void InitTextureTable (void);
73 static TEXTURE *Copy_Materials (TEXTURE *Old);
74 static void InitSolidNoise(void);
75 static DBL SolidNoise(const VECTOR P);
76 
77 /*****************************************************************************
78 * Local preprocessor defines
79 ******************************************************************************/
80 
81 /* Ridiculously large scaling values */
82 
83 const int MINX = -10000;
84 const int MINY = MINX;
85 const int MINZ = MINX;
86 
87 const int SINTABSIZE = 1000;
88 
89 #define SCURVE(a) ((a)*(a)*(3.0-2.0*(a)))
90 
91 // Hash2d assumed values in the range 0..8191
92 #define Hash2d(a,b)   \
93 	hashTable[(int)(hashTable[(int)(a)] ^ (b))]
94 
95 // Hash1dRTableIndex assumed values in the range 0..8191
96 #define Hash1dRTableIndex(a,b)   \
97 	((hashTable[(int)(a) ^ (b)] & 0xFF) * 2)
98 
99 #define INCRSUM(m,s,x,y,z)  \
100 	((s)*(RTable[m+1] + RTable[m+2]*(x) + RTable[m+4]*(y) + RTable[m+6]*(z)))
101 
102 #define INCRSUMP(mp,s,x,y,z) \
103 	((s)*((mp[1]) + (mp[2])*(x) + (mp[4])*(y) + (mp[6])*(z)))
104 
105 
106 /*****************************************************************************
107 * Local typedefs
108 ******************************************************************************/
109 
110 
111 
112 /*****************************************************************************
113 * Local variables
114 ******************************************************************************/
115 
116 static DBL *sintab; // GLOBAL VARIABLE
117 
118 #ifdef DYNAMIC_HASHTABLE
119 unsigned short *hashTable; // GLOBAL VARIABLE
120 #else
121 ALIGN16 unsigned short hashTable[8192]; // GLOBAL VARIABLE
122 #endif
123 
124 /*****************************************************************************
125 * Local variables
126 ******************************************************************************/
127 
128 ALIGN16 DBL RTable[267*2] =
129 {
130 	        -1, 0.0,    0.604974, 0.0,   -0.937102, 0.0,    0.414115, 0.0,    0.576226, 0.0,  -0.0161593, 0.0,
131 	  0.432334, 0.0,    0.103685, 0.0,    0.590539, 0.0,   0.0286412, 0.0,     0.46981, 0.0,    -0.84622, 0.0,
132 	-0.0734112, 0.0,   -0.304097, 0.0,    -0.40206, 0.0,   -0.210132, 0.0,   -0.919127, 0.0,    0.652033, 0.0,
133 	  -0.83151, 0.0,   -0.183948, 0.0,   -0.671107, 0.0,    0.852476, 0.0,    0.043595, 0.0,   -0.404532, 0.0,
134 	   0.75494, 0.0,   -0.335653, 0.0,    0.618433, 0.0,    0.605707, 0.0,    0.708583, 0.0,   -0.477195, 0.0,
135 	  0.899474, 0.0,    0.490623, 0.0,    0.221729, 0.0,   -0.400381, 0.0,   -0.853727, 0.0,   -0.932586, 0.0,
136 	  0.659113, 0.0,    0.961303, 0.0,    0.325948, 0.0,   -0.750851, 0.0,    0.842466, 0.0,    0.734401, 0.0,
137 	 -0.649866, 0.0,    0.394491, 0.0,   -0.466056, 0.0,   -0.434073, 0.0,    0.109026, 0.0,   0.0847028, 0.0,
138 	 -0.738857, 0.0,    0.241505, 0.0,     0.16228, 0.0,    -0.71426, 0.0,   -0.883665, 0.0,   -0.150408, 0.0,
139 	  -0.90396, 0.0,   -0.686549, 0.0,   -0.785214, 0.0,    0.488548, 0.0,   0.0246433, 0.0,    0.142473, 0.0,
140 	 -0.602136, 0.0,    0.375845, 0.0, -0.00779736, 0.0,    0.498955, 0.0,   -0.268147, 0.0,    0.856382, 0.0,
141 	 -0.386007, 0.0,   -0.596094, 0.0,   -0.867735, 0.0,   -0.570977, 0.0,   -0.914366, 0.0,     0.28896, 0.0,
142 	  0.672206, 0.0,   -0.233783, 0.0,     0.94815, 0.0,    0.895262, 0.0,    0.343252, 0.0,   -0.173388, 0.0,
143 	 -0.767971, 0.0,   -0.314748, 0.0,    0.824308, 0.0,   -0.342092, 0.0,    0.721431, 0.0,    -0.24004, 0.0,
144 	  -0.63653, 0.0,    0.553277, 0.0,    0.376272, 0.0,    0.158984, 0.0,   -0.452659, 0.0,    0.396323, 0.0,
145 	 -0.420676, 0.0,   -0.454154, 0.0,    0.122179, 0.0,    0.295857, 0.0,   0.0664225, 0.0,   -0.202075, 0.0,
146 	 -0.724788, 0.0,    0.453513, 0.0,    0.224567, 0.0,   -0.908812, 0.0,    0.176349, 0.0,   -0.320516, 0.0,
147 	 -0.697139, 0.0,    0.742702, 0.0,   -0.900786, 0.0,    0.471489, 0.0,   -0.133532, 0.0,    0.119127, 0.0,
148 	 -0.889769, 0.0,    -0.23183, 0.0,   -0.669673, 0.0,   -0.046891, 0.0,   -0.803433, 0.0,   -0.966735, 0.0,
149 	  0.475578, 0.0,   -0.652644, 0.0,   0.0112459, 0.0,   -0.730007, 0.0,    0.128283, 0.0,    0.145647, 0.0,
150 	 -0.619318, 0.0,    0.272023, 0.0,    0.392966, 0.0,    0.646418, 0.0,  -0.0207675, 0.0,   -0.315908, 0.0,
151 	  0.480797, 0.0,    0.535668, 0.0,   -0.250172, 0.0,    -0.83093, 0.0,   -0.653773, 0.0,   -0.443809, 0.0,
152 	  0.119982, 0.0,   -0.897642, 0.0,     0.89453, 0.0,    0.165789, 0.0,    0.633875, 0.0,   -0.886839, 0.0,
153 	  0.930877, 0.0,   -0.537194, 0.0,    0.587732, 0.0,    0.722011, 0.0,   -0.209461, 0.0,  -0.0424659, 0.0,
154 	 -0.814267, 0.0,   -0.919432, 0.0,    0.280262, 0.0,    -0.66302, 0.0,   -0.558099, 0.0,   -0.537469, 0.0,
155 	 -0.598779, 0.0,    0.929656, 0.0,   -0.170794, 0.0,   -0.537163, 0.0,    0.312581, 0.0,    0.959442, 0.0,
156 	  0.722652, 0.0,    0.499931, 0.0,    0.175616, 0.0,   -0.534874, 0.0,   -0.685115, 0.0,    0.444999, 0.0,
157 	   0.17171, 0.0,    0.108202, 0.0,   -0.768704, 0.0,   -0.463828, 0.0,    0.254231, 0.0,    0.546014, 0.0,
158 	  0.869474, 0.0,    0.875212, 0.0,   -0.944427, 0.0,    0.130724, 0.0,   -0.110185, 0.0,    0.312184, 0.0,
159 	  -0.33138, 0.0,   -0.629206, 0.0,   0.0606546, 0.0,    0.722866, 0.0,  -0.0979477, 0.0,    0.821561, 0.0,
160 	 0.0931258, 0.0,   -0.972808, 0.0,   0.0318151, 0.0,   -0.867033, 0.0,   -0.387228, 0.0,    0.280995, 0.0,
161 	 -0.218189, 0.0,   -0.539178, 0.0,   -0.427359, 0.0,   -0.602075, 0.0,    0.311971, 0.0,    0.277974, 0.0,
162 	  0.773159, 0.0,    0.592493, 0.0,  -0.0331884, 0.0,   -0.630854, 0.0,   -0.269947, 0.0,    0.339132, 0.0,
163 	  0.581079, 0.0,    0.209461, 0.0,   -0.317433, 0.0,   -0.284993, 0.0,    0.181323, 0.0,    0.341634, 0.0,
164 	  0.804959, 0.0,   -0.229572, 0.0,   -0.758907, 0.0,   -0.336721, 0.0,    0.605463, 0.0,   -0.991272, 0.0,
165 	-0.0188754, 0.0,   -0.300191, 0.0,    0.368307, 0.0,   -0.176135, 0.0,     -0.3832, 0.0,   -0.749569, 0.0,
166 	   0.62356, 0.0,   -0.573938, 0.0,    0.278309, 0.0,   -0.971313, 0.0,    0.839994, 0.0,   -0.830686, 0.0,
167 	  0.439078, 0.0,     0.66128, 0.0,    0.694514, 0.0,   0.0565042, 0.0,     0.54342, 0.0,   -0.438804, 0.0,
168 	-0.0228428, 0.0,   -0.687068, 0.0,    0.857267, 0.0,    0.301991, 0.0,   -0.494255, 0.0,   -0.941039, 0.0,
169 	  0.775509, 0.0,    0.410575, 0.0,   -0.362081, 0.0,   -0.671534, 0.0,   -0.348379, 0.0,    0.932433, 0.0,
170 	  0.886442, 0.0,    0.868681, 0.0,   -0.225666, 0.0,   -0.062211, 0.0,  -0.0976425, 0.0,   -0.641444, 0.0,
171 	 -0.848112, 0.0,    0.724697, 0.0,    0.473503, 0.0,    0.998749, 0.0,    0.174701, 0.0,    0.559625, 0.0,
172 	 -0.029099, 0.0,   -0.337392, 0.0,   -0.958129, 0.0,   -0.659785, 0.0,    0.236042, 0.0,   -0.246937, 0.0,
173 	  0.659449, 0.0,   -0.027512, 0.0,    0.821897, 0.0,   -0.226215, 0.0,   0.0181735, 0.0,    0.500481, 0.0,
174 	 -0.420127, 0.0,   -0.427878, 0.0,    0.566186, 0.0
175 };
176 
177 /*****************************************************************************/
178 /* Platform specific faster noise functions support                          */
179 /* (Profiling revealed that the noise functions can take up to 50% of        */
180 /*  all the time required when rendering and current compilers cannot        */
181 /*  easily optimise them efficiently without some help from programmers!)    */
182 /*****************************************************************************/
183 
184 #if USE_FASTER_NOISE
185 }
186 #include "fasternoise.h"
187 namespace pov
188 {
189 	#ifndef FASTER_NOISE_INIT
190 		#define FASTER_NOISE_INIT()
191 	#endif
192 #else
193 #if !defined(USE_AVX_FMA4_FOR_NOISE)
194 	#define OriNoise Noise
195 	#define OriDNoise DNoise
196 #endif
197 
198 	#define FASTER_NOISE_INIT()
199 #endif
200 
201 /*****************************************************************************
202 *
203 * FUNCTION
204 *
205 *   Initialize_Noise()
206 *
207 * INPUT
208 *
209 * OUTPUT
210 *
211 * RETURNS
212 *
213 * AUTHOR
214 *
215 *   POV-Ray Team
216 *
217 * DESCRIPTION
218 *
219 * CHANGES
220 *
221 ******************************************************************************/
222 
Initialize_Noise()223 void Initialize_Noise()
224 {
225 #if defined(USE_AVX_FMA4_FOR_NOISE)
226 	Initialise_NoiseDispatch();
227 #endif
228 	InitTextureTable();
229 
230 	/* are - initialize Perlin style noise function */
231 	InitSolidNoise();
232 
233 	sintab = reinterpret_cast<DBL *>(POV_MALLOC(SINTABSIZE * sizeof(DBL), "sine table"));
234 
235 	for(int i = 0 ; i < 267 ; i++)
236 		RTable[(i * 2) + 1] = RTable[i * 2] * 0.5;
237 	//    Debug_Info("%.10f  %.10f\n", (DBL)(RTable[i * 2] - (DBL)((float)(RTable[i * 2]))), (DBL)(RTable[(i * 2) + 1] - (DBL)((float)(RTable[(i * 2) + 1]))));
238 
239 	for(int i = 0 ; i < SINTABSIZE ; i++)
240 		sintab[i] = sin((DBL)i / SINTABSIZE * TWO_M_PI);
241 }
242 
Initialize_Waves(vector<double> & waveFrequencies,vector<Vector3d> & waveSources,unsigned int numberOfWaves)243 void Initialize_Waves(vector<double>& waveFrequencies, vector<Vector3d>& waveSources, unsigned int numberOfWaves)
244 {
245 	Vector3d point;
246 
247 	waveFrequencies.clear();
248 	waveSources.clear();
249 
250 	for(int i = 0, next_rand = -560851967; i < numberOfWaves ; i++)
251 	{
252 		point = Vector3d((double)i,0.0,0.0);
253 		DNoise(*point, *point);
254 		waveSources.push_back(point.normalized());
255 
256 		next_rand = next_rand * 1812433253L + 12345L;
257 		waveFrequencies.push_back((double((int)(next_rand >> 16) & 0x7FFF) * 0.000030518509476) + 0.01);
258 	}
259 }
260 
261 
262 
263 /*****************************************************************************
264 *
265 * FUNCTION
266 *
267 *   InitTextureTable()
268 *
269 * INPUT
270 *
271 * OUTPUT
272 *
273 * RETURNS
274 *
275 * AUTHOR
276 *
277 *   POV-Ray Team
278 *
279 * DESCRIPTION
280 *
281 * CHANGES
282 *
283 ******************************************************************************/
284 
InitTextureTable()285 static void InitTextureTable()
286 {
287 	unsigned short j, temp;
288 	int i;
289 	int next_rand = 0;
290 
291 	#ifdef DYNAMIC_HASHTABLE
292 		hashTable = reinterpret_cast<unsigned short *>(POV_MALLOC(8192*sizeof(unsigned short), "hash table"));
293 	#endif
294 
295 	for(i = 0; i < 4096; i++)
296 		hashTable[i] = i;
297 
298 	for(i = 4095; i >= 0; i--)
299 	{
300 		next_rand = next_rand * 1812433253L + 12345L;
301 		j = ((int)(next_rand >> 16) & 0x7FFF) % 4096;
302 		temp = hashTable[i];
303 		hashTable[i] = hashTable[j];
304 		hashTable[j] = temp;
305 	}
306 
307 	for(i = 0; i < 4096; i++)
308 		hashTable[4096 + i] = hashTable[i];
309 
310 	FASTER_NOISE_INIT();
311 }
312 
313 
314 
315 /*****************************************************************************
316 *
317 * FUNCTION
318 *
319 *   Free_Noise_Tables()
320 *
321 * INPUT
322 *
323 * OUTPUT
324 *
325 * RETURNS
326 *
327 * AUTHOR
328 *
329 *   POV-Ray Team
330 *
331 * DESCRIPTION
332 *
333 * CHANGES
334 *
335 ******************************************************************************/
336 
Free_Noise_Tables()337 void Free_Noise_Tables()
338 {
339 	if (sintab != NULL)
340 	{
341 		POV_FREE(sintab);
342 
343 		sintab       = NULL;
344 
345 #ifdef DYNAMIC_HASHTABLE
346 		POV_FREE(hashTable);
347 		hashTable    = NULL;
348 #endif
349 	}
350 }
351 
352 
353 
354 /*****************************************************************************
355 *
356 * FUNCTION
357 *
358 *   Noise
359 *
360 * INPUT
361 *
362 *   EPoint -- 3-D point at which noise is evaluated
363 *
364 * OUTPUT
365 *
366 * RETURNS
367 *
368 *   DBL noise value
369 *
370 * AUTHOR
371 *
372 *   Robert Skinner based on Ken Perlin
373 *
374 * DESCRIPTION
375 *
376 * CHANGES
377 *   Modified by AAC to ensure uniformly distributed clamped values
378 *   between 0 and 1.0...
379 *
380 *   Feb 8, 2001: modified function based on MegaPov 0.7 to remove
381 *                bugs that showed up when noise was translated.
382 *
383 ******************************************************************************/
384 
OriNoise(const VECTOR EPoint,int noise_generator)385 DBL OriNoise(const VECTOR EPoint, int noise_generator)
386 {
387 	DBL x, y, z;
388 	DBL *mp;
389 	int tmp;
390 	int ix, iy, iz;
391 	int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
392 
393 	DBL sx, sy, sz, tx, ty, tz;
394 	DBL sum = 0.0;
395 
396 	DBL x_ix, x_jx, y_iy, y_jy, z_iz, z_jz, txty, sxty, txsy, sxsy;
397 
398 	// TODO FIXME - global statistics reference
399 	// Stats[Calls_To_Noise]++;
400 
401 	if (noise_generator==3)
402 	{
403 		// The 1.59 and 0.985 are to correct for some biasing problems with
404 		// the random # generator used to create the noise tables.  Final
405 		// range of values is about 5.0e-4 below 0.0 and above 1.0.  Mean
406 		// value is 0.49 (ideally it would be 0.5).
407 		sum = 0.5 * (1.59 * SolidNoise(EPoint) + 0.985);
408 
409 		// Clamp final value to 0-1 range
410 			if (sum < 0.0) sum = 0.0;
411 			if (sum > 1.0) sum = 1.0;
412 
413 		return sum;
414 	}
415 
416 	x = EPoint[X];
417 	y = EPoint[Y];
418 	z = EPoint[Z];
419 
420 	/* its equivalent integer lattice point. */
421 	/* ix = (int)x; iy = (int)y; iz = (long)z; */
422 	/* JB fix for the range problem */
423 	tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
424 	ix = (int)((tmp-MINX)&0xFFF);
425 	x_ix = x-tmp;
426 
427 	tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
428 	iy = (int)((tmp-MINY)&0xFFF);
429 	y_iy = y-tmp;
430 
431 	tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
432 	iz = (int)((tmp-MINZ)&0xFFF);
433 	z_iz = z-tmp;
434 
435 	x_jx = x_ix-1; y_jy = y_iy-1; z_jz = z_iz-1;
436 
437 	sx = SCURVE(x_ix); sy = SCURVE(y_iy); sz = SCURVE(z_iz);
438 
439 	/* the complement values of sx,sy,sz */
440 	tx = 1 - sx; ty = 1 - sy; tz = 1 - sz;
441 
442 	/*
443 	 *  interpolate!
444 	 */
445 	txty = tx * ty;
446 	sxty = sx * ty;
447 	txsy = tx * sy;
448 	sxsy = sx * sy;
449 	ixiy_hash = Hash2d(ix,     iy);
450 	jxiy_hash = Hash2d(ix + 1, iy);
451 	ixjy_hash = Hash2d(ix,     iy + 1);
452 	jxjy_hash = Hash2d(ix + 1, iy + 1);
453 
454 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
455 	sum = INCRSUMP(mp, (txty*tz), x_ix, y_iy, z_iz);
456 
457 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
458 	sum += INCRSUMP(mp, (sxty*tz), x_jx, y_iy, z_iz);
459 
460 	mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
461 	sum += INCRSUMP(mp, (txsy*tz), x_ix, y_jy, z_iz);
462 
463 	mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
464 	sum += INCRSUMP(mp, (sxsy*tz), x_jx, y_jy, z_iz);
465 
466 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
467 	sum += INCRSUMP(mp, (txty*sz), x_ix, y_iy, z_jz);
468 
469 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
470 	sum += INCRSUMP(mp, (sxty*sz), x_jx, y_iy, z_jz);
471 
472 	mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
473 	sum += INCRSUMP(mp, (txsy*sz), x_ix, y_jy, z_jz);
474 
475 	mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
476 	sum += INCRSUMP(mp, (sxsy*sz), x_jx, y_jy, z_jz);
477 
478 	if(noise_generator==2)
479 	{
480 		/* details of range here:
481 		Min, max: -1.05242, 0.988997
482 		Mean: -0.0191481, Median: -0.535493, Std Dev: 0.256828
483 
484 		We want to change it to as close to [0,1] as possible.
485 		*/
486 		sum += 1.05242;
487 		sum *= 0.48985582;
488 		/*sum *= 0.5;
489 			sum += 0.5;*/
490 
491 		if (sum < 0.0)
492 			sum = 0.0;
493 		if (sum > 1.0)
494 			sum = 1.0;
495 	}
496 	else
497 	{
498 		sum = sum + 0.5;                     /* range at this point -0.5 - 0.5... */
499 
500 		if (sum < 0.0)
501 			sum = 0.0;
502 		if (sum > 1.0)
503 			sum = 1.0;
504 	}
505 
506 	return (sum);
507 }
508 
509 
510 
511 /*****************************************************************************
512 *
513 * FUNCTION
514 *
515 *   DNoise
516 *
517 * INPUT
518 *
519 *   EPoint -- 3-D point at which noise is evaluated
520 *
521 * OUTPUT
522 *
523 *   VECTOR result
524 *
525 * RETURNS
526 *
527 * AUTHOR
528 *
529 *   Robert Skinner based on Ken Perlin
530 *
531 * DESCRIPTION
532 *   Vector-valued version of "Noise"
533 *
534 * CHANGES
535 *   Modified by AAC to ensure uniformly distributed clamped values
536 *   between 0 and 1.0...
537 *
538 *   Feb 8, 2001: modified function based on MegaPov 0.7 to remove
539 *                bugs that showed up when noise was translated.
540 *
541 ******************************************************************************/
542 
OriDNoise(VECTOR result,const VECTOR EPoint)543 void OriDNoise(VECTOR result, const VECTOR EPoint)
544 {
545 	DBL x, y, z;
546 	DBL *mp;
547 	int tmp;
548 	int ix, iy, iz;
549 	int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
550 	DBL x_ix, x_jx, y_iy, y_jy, z_iz, z_jz;
551 	DBL s;
552 	DBL sx, sy, sz, tx, ty, tz;
553 	DBL txty, sxty, txsy, sxsy;
554 
555 	// TODO FIXME - global statistics reference
556 	// Stats[Calls_To_DNoise]++;
557 
558 	x = EPoint[X];
559 	y = EPoint[Y];
560 	z = EPoint[Z];
561 
562 	/* its equivalent integer lattice point. */
563 	/*ix = (int)x; iy = (int)y; iz = (int)z;
564 	x_ix = x - ix; y_iy = y - iy; z_iz = z - iz;*/
565 	            /* JB fix for the range problem */
566 	tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
567 	ix = (int)((tmp-MINX)&0xFFF);
568 	x_ix = x-tmp;
569 
570 	tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
571 	iy = (int)((tmp-MINY)&0xFFF);
572 	y_iy = y-tmp;
573 
574 	tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
575 	iz = (int)((tmp-MINZ)&0xFFF);
576 	z_iz = z-tmp;
577 
578 	x_jx = x_ix-1; y_jy = y_iy-1; z_jz = z_iz-1;
579 
580 	sx = SCURVE(x_ix); sy = SCURVE(y_iy); sz = SCURVE(z_iz);
581 
582 	/* the complement values of sx,sy,sz */
583 	tx = 1 - sx; ty = 1 - sy; tz = 1 - sz;
584 
585 	/*
586 	 *  interpolate!
587 	 */
588 	txty = tx * ty;
589 	sxty = sx * ty;
590 	txsy = tx * sy;
591 	sxsy = sx * sy;
592 	ixiy_hash = Hash2d(ix,     iy);
593 	jxiy_hash = Hash2d(ix + 1, iy);
594 	ixjy_hash = Hash2d(ix,     iy + 1);
595 	jxjy_hash = Hash2d(ix + 1, iy + 1);
596 
597 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
598 	s = txty*tz;
599 	result[X] = INCRSUMP(mp, s, x_ix, y_iy, z_iz);
600 	mp += 8;
601 	result[Y] = INCRSUMP(mp, s, x_ix, y_iy, z_iz);
602 	mp += 8;
603 	result[Z] = INCRSUMP(mp, s, x_ix, y_iy, z_iz);
604 
605 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
606 	s = sxty*tz;
607 	result[X] += INCRSUMP(mp, s, x_jx, y_iy, z_iz);
608 	mp += 8;
609 	result[Y] += INCRSUMP(mp, s, x_jx, y_iy, z_iz);
610 	mp += 8;
611 	result[Z] += INCRSUMP(mp, s, x_jx, y_iy, z_iz);
612 
613 	mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
614 	s = sxsy*tz;
615 	result[X] += INCRSUMP(mp, s, x_jx, y_jy, z_iz);
616 	mp += 8;
617 	result[Y] += INCRSUMP(mp, s, x_jx, y_jy, z_iz);
618 	mp += 8;
619 	result[Z] += INCRSUMP(mp, s, x_jx, y_jy, z_iz);
620 
621 	mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
622 	s = txsy*tz;
623 	result[X] += INCRSUMP(mp, s, x_ix, y_jy, z_iz);
624 	mp += 8;
625 	result[Y] += INCRSUMP(mp, s, x_ix, y_jy, z_iz);
626 	mp += 8;
627 	result[Z] += INCRSUMP(mp, s, x_ix, y_jy, z_iz);
628 
629 	mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
630 	s = txsy*sz;
631 	result[X] += INCRSUMP(mp, s, x_ix, y_jy, z_jz);
632 	mp += 8;
633 	result[Y] += INCRSUMP(mp, s, x_ix, y_jy, z_jz);
634 	mp += 8;
635 	result[Z] += INCRSUMP(mp, s, x_ix, y_jy, z_jz);
636 
637 	mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
638 	s = sxsy*sz;
639 	result[X] += INCRSUMP(mp, s, x_jx, y_jy, z_jz);
640 	mp += 8;
641 	result[Y] += INCRSUMP(mp, s, x_jx, y_jy, z_jz);
642 	mp += 8;
643 	result[Z] += INCRSUMP(mp, s, x_jx, y_jy, z_jz);
644 
645 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
646 	s = sxty*sz;
647 	result[X] += INCRSUMP(mp, s, x_jx, y_iy, z_jz);
648 	mp += 8;
649 	result[Y] += INCRSUMP(mp, s, x_jx, y_iy, z_jz);
650 	mp += 8;
651 	result[Z] += INCRSUMP(mp, s, x_jx, y_iy, z_jz);
652 
653 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
654 	s = txty*sz;
655 	result[X] += INCRSUMP(mp, s, x_ix, y_iy, z_jz);
656 	mp += 8;
657 	result[Y] += INCRSUMP(mp, s, x_ix, y_iy, z_jz);
658 	mp += 8;
659 	result[Z] += INCRSUMP(mp, s, x_ix, y_iy, z_jz);
660 }
661 
662 // Note that the value of NoiseEntries must be a power of 2.  This
663 // is because bit masking using (NoiseEntries-1) is used to rescale
664 // the input values to the noise function.
665 const int NoiseEntries = 2048;
666 static int NoisePermutation[2*(NoiseEntries+1)];
667 static VECTOR NoiseGradients[2*(NoiseEntries+1)];
668 
669 const DBL ROLLOVER = 10000000.023157213;
670 
671 static void
InitSolidNoise(void)672 InitSolidNoise(void)
673 {
674 	int i, j, k;
675 	VECTOR v;
676 	DBL s;
677 
678 	// Create an array of random gradient vectors uniformly on the unit sphere
679 	int next_rand = 1;
680 	for(i = 0; i < NoiseEntries; i++)
681 	{
682 		do
683 		{
684 			for(j = 0; j < 3; j++)
685 			{
686 				next_rand = next_rand * 1812433253L + 12345L;
687 				v[j] = (DBL)((((int)(next_rand >> 16) & 0x7FFF) % (NoiseEntries << 1)) - NoiseEntries) / (DBL)NoiseEntries;
688 			}
689 			s = VSumSqr(v);
690 		} while ((s > 1.0) || (s < 1.0e-5));
691 		s = 1.0 / sqrt(s);
692 		VScaleEq(v, s);
693 
694 		Assign_Vector(NoiseGradients[i], v);
695 	}
696 	// Create a pseudorandom permutation of [0..NoiseEntries]
697 	for(i = 0; i < NoiseEntries; i++)
698 		NoisePermutation[i] = i;
699 	for(i = NoiseEntries; i > 0; i -= 2)
700 	{
701 		k = NoisePermutation[i];
702 		next_rand = next_rand * 1812433253L + 12345L;
703 		j = ((int)(next_rand >> 16) & 0x7FFF) % NoiseEntries;
704 		NoisePermutation[i] = NoisePermutation[j];
705 		NoisePermutation[j] = k;
706 	}
707 	// Duplicate the entries so that we don't need a modulus operation
708 	// to get a value out.
709 	for(i = 0; i < NoiseEntries + 2; i++)
710 	{
711 		NoisePermutation[NoiseEntries + i] = NoisePermutation[i];
712 		Assign_Vector(NoiseGradients[NoiseEntries+i], NoiseGradients[i]);
713 	}
714 }
715 
716 // Hermite curve from 0 to 1.  Makes a nice smooth transition of values.
717 static DBL inline
SCurve(DBL t)718 SCurve(DBL t)
719 {
720 	return (t * t * (3.0 - 2.0 * t));
721 }
722 
723 
724 // Linear interpolation between a and b, as the value of t goes from 0 to 1.
725 static DBL inline
Lerp(DBL t,DBL a,DBL b)726 Lerp(DBL t, DBL a, DBL b)
727 {
728 	return ((a) + (t) * ((b) - (a)));
729 }
730 
731 // Linear interpolation between a and b, as the value of t goes from 0 to 1.
732 static void inline
VLerp(VECTOR v,DBL t,const VECTOR a,const VECTOR b)733 VLerp(VECTOR v, DBL t, const VECTOR a, const VECTOR b)
734 {
735 	v[X] = Lerp(t, a[X], b[X]);
736 	v[Y] = Lerp(t, a[Y], b[Y]);
737 	v[Z] = Lerp(t, a[Z], b[Z]);
738 }
739 
740 static void inline
SetupSolidNoise(const VECTOR P,int i,int & b0,int & b1,DBL & r0,DBL & r1)741 SetupSolidNoise(const VECTOR P, int i, int &b0, int &b1, DBL &r0, DBL &r1)
742 {
743 	DBL t = P[i] + ROLLOVER;
744 
745 	int it = (int)floor(t);
746 	b0 = it & (NoiseEntries - 1);
747 	b1 = (b0 + 1) & (NoiseEntries - 1);
748 	r0 = t - it;
749 	r1 = r0 - 1.0;
750 }
751 
752 static DBL inline
NoiseValueAt(const VECTOR q,DBL rx,DBL ry,DBL rz)753 NoiseValueAt(const VECTOR q, DBL rx, DBL ry, DBL rz)
754 {
755 	return (rx * q[X] + ry * q[Y] + rz * q[Z]);
756 }
757 
758 static DBL
SolidNoise(const VECTOR P)759 SolidNoise(const VECTOR P)
760 {
761 	int bx0, bx1, by0, by1, bz0, bz1;
762 	int b00, b10, b01, b11;
763 	DBL rx0, rx1, ry0, ry1, rz0, rz1;
764 	DBL sx, sy, sz, a, b, c, d, t, u, v;
765 	int i, j;
766 
767 	SetupSolidNoise(P, 0, bx0, bx1, rx0, rx1);
768 	SetupSolidNoise(P, 1, by0, by1, ry0, ry1);
769 	SetupSolidNoise(P, 2, bz0, bz1, rz0, rz1);
770 
771 	i = NoisePermutation[bx0];
772 	j = NoisePermutation[bx1];
773 
774 	b00 = NoisePermutation[i + by0];
775 	b10 = NoisePermutation[j + by0];
776 	b01 = NoisePermutation[i + by1];
777 	b11 = NoisePermutation[j + by1];
778 
779 	sx = SCurve(rx0);
780 	sy = SCurve(ry0);
781 	sz = SCurve(rz0);
782 
783 	u = NoiseValueAt(NoiseGradients[b00 + bz0], rx0, ry0, rz0);
784 	v = NoiseValueAt(NoiseGradients[b10 + bz0], rx1, ry0, rz0);
785 	a = Lerp(sx, u, v);
786 
787 	u = NoiseValueAt(NoiseGradients[b01 + bz0], rx0, ry1, rz0);
788 	v = NoiseValueAt(NoiseGradients[b11 + bz0], rx1, ry1, rz0);
789 	b = Lerp(sx, u, v);
790 
791 	c = Lerp(sy, a, b);
792 
793 	u = NoiseValueAt(NoiseGradients[b00 + bz1], rx0, ry0, rz1);
794 	v = NoiseValueAt(NoiseGradients[b10 + bz1], rx1, ry0, rz1);
795 	a = Lerp(sx, u, v);
796 
797 	u = NoiseValueAt(NoiseGradients[b01 + bz1], rx0, ry1, rz1);
798 	v = NoiseValueAt(NoiseGradients[b11 + bz1], rx1, ry1, rz1);
799 	b = Lerp(sx, u, v);
800 
801 	d = Lerp(sy, a, b);
802 
803 	t = Lerp(sz, c, d);
804 
805 	return t;
806 }
807 
808 static void
SolidDNoise(const VECTOR P,VECTOR D)809 SolidDNoise(const VECTOR P, VECTOR D)
810 {
811 	int bx0, bx1, by0, by1, bz0, bz1;
812 	int b00, b10, b01, b11;
813 	DBL rx0, rx1, ry0, ry1, rz0, rz1;
814 	DBL sx, sy, sz;
815 	VECTOR a, b, c, d, u, v;
816 	int i, j;
817 
818 	SetupSolidNoise(P, 0, bx0, bx1, rx0, rx1);
819 	SetupSolidNoise(P, 1, by0, by1, ry0, ry1);
820 	SetupSolidNoise(P, 2, bz0, bz1, rz0, rz1);
821 
822 	i = NoisePermutation[bx0];
823 	j = NoisePermutation[bx1];
824 
825 	b00 = NoisePermutation[i + by0];
826 	b10 = NoisePermutation[j + by0];
827 	b01 = NoisePermutation[i + by1];
828 	b11 = NoisePermutation[j + by1];
829 
830 	sx = SCurve(rx0);
831 	sy = SCurve(ry0);
832 	sz = SCurve(rz0);
833 
834 
835 	Assign_Vector(u, NoiseGradients[b00 + bz0]);
836 	Assign_Vector(v, NoiseGradients[b10 + bz0]);
837 	VLerp(a, sx, u, v);
838 
839 	Assign_Vector(u, NoiseGradients[b01 + bz0]);
840 	Assign_Vector(v, NoiseGradients[b11 + bz0]);
841 	VLerp(b, sx, u, v);
842 
843 	VLerp(c, sy, a, b);
844 
845 	Assign_Vector(u, NoiseGradients[b00 + bz1]);
846 	Assign_Vector(v, NoiseGradients[b10 + bz1]);
847 	VLerp(a, sx, u, v);
848 
849 	Assign_Vector(u, NoiseGradients[b01 + bz1]);
850 	Assign_Vector(v, NoiseGradients[b11 + bz1]);
851 	VLerp(b, sx, u, v);
852 
853 	VLerp(d, sy, a, b);
854 
855 	VLerp(D, sz, c, d);
856 }
857 
858 
859 /*****************************************************************************
860 *
861 * FUNCTION
862 *
863 *   Turbulence
864 *
865 * INPUT
866 *
867 *   EPoint -- Point at which turb is evaluated.
868 *   Turb   -- Parameters for fbm calculations.
869 *
870 * OUTPUT
871 *
872 * RETURNS
873 *
874 *   DBL result
875 *
876 * AUTHOR
877 *
878 *   POV-Ray Team
879 *
880 * DESCRIPTION   : Computes a Fractal Brownian Motion turbulence value
881 *                 using repeated calls to a Perlin Noise function.
882 *
883 * CHANGES
884 *   ??? ???? : Updated with varible Octaves, Lambda, & Omega by [DMF]
885 *
886 ******************************************************************************/
887 
Turbulence(const VECTOR EPoint,const TURB * Turb,int noise_generator)888 DBL Turbulence(const VECTOR EPoint, const TURB *Turb, int noise_generator)
889 {
890 	int i;
891 	DBL Lambda, Omega, l, o, value;
892 	VECTOR temp;
893 	int Octaves=Turb->Octaves;
894 
895 	if (noise_generator>1)
896 	{
897 		value = (2.0 * Noise(EPoint, noise_generator) - 0.5);
898 		value = min(max(value,0.0),1.0);
899 	} else {
900 		value = Noise(EPoint, noise_generator);
901 	}
902 
903 	l = Lambda = Turb->Lambda;
904 	o = Omega  = Turb->Omega;
905 
906 	for (i = 2; i <= Octaves; i++)
907 	{
908 		VScale(temp,EPoint,l);
909 		if (noise_generator>1)
910 			value += o * (2.0 * Noise(temp, noise_generator) - 0.5);
911 	else
912 		value += o * Noise(temp, noise_generator);
913 		if (i < Octaves)
914 		{
915 			l *= Lambda;
916 			o *= Omega;
917 		}
918 	}
919 	return (value);
920 }
921 
922 
923 
924 /*****************************************************************************
925 *
926 * FUNCTION
927 *
928 *   DTurbulence
929 *
930 * INPUT
931 *
932 *   EPoint -- Point at which turb is evaluated.
933 *   Turb   -- Parameters for fmb calculations.
934 *
935 * OUTPUT
936 *
937 *   result -- Vector valued turbulence
938 *
939 * RETURNS
940 *
941 * AUTHOR
942 *
943 *   POV-Ray Team
944 *
945 * DESCRIPTION   : Computes a Fractal Brownian Motion turbulence value
946 *                 using repeated calls to a Perlin DNoise function.
947 *
948 * CHANGES
949 *   ??? ???? : Updated with varible Octaves, Lambda, & Omega by [DMF]
950 *
951 ******************************************************************************/
952 
953 
DTurbulence(VECTOR result,const VECTOR EPoint,const TURB * Turb)954 void DTurbulence(VECTOR result, const VECTOR EPoint, const TURB *Turb)
955 {
956 	DBL Omega, Lambda;
957 	int i;
958 	DBL l, o;
959 	VECTOR value, temp;
960 	int Octaves=Turb->Octaves;
961 
962 	result[X] = result[Y] = result[Z] = 0.0;
963 	value[X]  = value[Y]  = value[Z]  = 0.0;
964 
965 	DNoise(result, EPoint);
966 
967 	l = Lambda = Turb->Lambda;
968 	o = Omega  = Turb->Omega;
969 
970 	for (i = 2; i <= Octaves; i++)
971 	{
972 		VScale(temp,EPoint,l);
973 
974 		DNoise(value, temp);
975 		result[X] += o * value[X];
976 		result[Y] += o * value[Y];
977 		result[Z] += o * value[Z];
978 		if (i < Octaves)
979 		{
980 			l *= Lambda;
981 			o *= Omega;
982 		}
983 	}
984 }
985 
986 
987 
988 /*****************************************************************************
989 *
990 * FUNCTION
991 *
992 *   cycloidal
993 *
994 * INPUT
995 *
996 *   DBL value
997 *
998 * OUTPUT
999 *
1000 * RETURNS
1001 *
1002 *   DBL result
1003 *
1004 * AUTHOR
1005 *
1006 *   POV-Ray Team
1007 *
1008 * DESCRIPTION
1009 *
1010 * CHANGES
1011 *
1012 ******************************************************************************/
1013 
cycloidal(DBL value)1014 DBL cycloidal(DBL value)
1015 {
1016 	if (value >= 0.0)
1017 	{
1018 		return sin((DBL) (((value - floor(value)) * 50000.0)) / 50000.0 * TWO_M_PI)  ;
1019 	}
1020 	else
1021 	{
1022 		return 0.0-sin((DBL) (((0.0 - (value + floor(0.0 - value))) * 50000.0)) / 50000.0 * TWO_M_PI);
1023 	}
1024 }
1025 
1026 
1027 
1028 /*****************************************************************************
1029 *
1030 * FUNCTION
1031 *
1032 *   Triangle_Wave
1033 *
1034 * INPUT
1035 *
1036 *   DBL value
1037 *
1038 * OUTPUT
1039 *
1040 * RETURNS
1041 *
1042 *   DBL result
1043 *
1044 * AUTHOR
1045 *
1046 *   POV-Ray Team
1047 *
1048 * DESCRIPTION
1049 *
1050 * CHANGES
1051 *
1052 ******************************************************************************/
1053 
Triangle_Wave(DBL value)1054 DBL Triangle_Wave(DBL value)
1055 {
1056 	DBL offset;
1057 
1058 	if (value >= 0.0)
1059 	{
1060 		offset = value - floor(value);
1061 	}
1062 	else
1063 	{
1064 		offset = value + 1.0 + floor(fabs(value));
1065 	}
1066 	if (offset >= 0.5)
1067 	{
1068 		return (2.0 * (1.0 - offset));
1069 	}
1070 	else
1071 	{
1072 		return (2.0 * offset);
1073 	}
1074 }
1075 
1076 
1077 
1078 /*****************************************************************************
1079 *
1080 * FUNCTION
1081 *
1082 * INPUT
1083 *
1084 * OUTPUT
1085 *
1086 * RETURNS
1087 *
1088 * AUTHOR
1089 *
1090 *   POV-Ray Team
1091 *
1092 * DESCRIPTION
1093 *
1094 * CHANGES
1095 *
1096 ******************************************************************************/
1097 
Transform_Textures(TEXTURE * Textures,const TRANSFORM * Trans)1098 void Transform_Textures(TEXTURE *Textures, const TRANSFORM *Trans)
1099 {
1100 	TEXTURE *Layer;
1101 
1102 	for (Layer = Textures; Layer != NULL; Layer = reinterpret_cast<TEXTURE *>(Layer->Next))
1103 	{
1104 		if (Layer->Type == PLAIN_PATTERN)
1105 		{
1106 			Transform_Tpattern(reinterpret_cast<TPATTERN *>(Layer->Pigment), Trans);
1107 			Transform_Tpattern(reinterpret_cast<TPATTERN *>(Layer->Tnormal), Trans);
1108 		}
1109 		else
1110 		{
1111 			Transform_Tpattern(reinterpret_cast<TPATTERN *>(Layer), Trans);
1112 		}
1113 	}
1114 }
1115 
1116 
1117 
1118 /*****************************************************************************
1119 *
1120 * FUNCTION
1121 *
1122 * INPUT
1123 *
1124 * OUTPUT
1125 *
1126 * RETURNS
1127 *
1128 * AUTHOR
1129 *
1130 *   POV-Ray Team
1131 *
1132 * DESCRIPTION
1133 *
1134 * CHANGES
1135 *   6/27/98  MBP  Added initializers for reflection blur
1136 *   8/27/98  MBP  Added initializers for angle-based reflectivity
1137 *
1138 ******************************************************************************/
1139 
Create_Finish()1140 FINISH *Create_Finish()
1141 {
1142 	FINISH *New;
1143 
1144 	New = reinterpret_cast<FINISH *>(POV_MALLOC(sizeof (FINISH), "finish"));
1145 
1146 	New->Ambient.set(0.1);
1147 	New->Emission.clear();
1148 	New->Reflection_Max.clear();
1149 	New->Reflection_Min.clear();
1150 
1151 	New->Reflection_Type    = 0;
1152 	New->Reflection_Falloff = 1;    /* Added by MBP 8/27/98 */
1153 	New->Diffuse            = 0.6;
1154 	New->DiffuseBack        = 0.0;
1155 	New->RawDiffuse         = New->Diffuse;
1156 	New->RawDiffuseBack     = New->DiffuseBack;
1157 	New->Brilliance         = 1.0;
1158 	New->Phong              = 0.0;
1159 	New->Phong_Size         = 40.0;
1160 	New->Specular           = 0.0;
1161 	New->Roughness          = 1.0 / 0.05;
1162 
1163 	New->Crand = 0.0;
1164 
1165 	New->Metallic = 0.0;
1166 
1167 	New->Irid                = 0.0;
1168 	New->Irid_Film_Thickness = 0.0;
1169 	New->Irid_Turb           = 0.0;
1170 	New->Temp_Caustics = -1.0;
1171 	New->Temp_IOR     = -1.0;
1172 	New->Temp_Dispersion  = 1.0;
1173 	New->Temp_Refract =  1.0;
1174 	New->Reflect_Exp  =  1.0;
1175 	New->Reflect_Metallic = 0.0;
1176 	/* Added Dec 19 1999 by NK */
1177 	New->Conserve_Energy = false;
1178 
1179 	New->UseSubsurface = false;
1180 	New->SubsurfaceTranslucency.clear();
1181 	New->SubsurfaceAnisotropy.clear();
1182 
1183 	return(New);
1184 }
1185 
1186 
1187 
1188 /*****************************************************************************
1189 *
1190 * FUNCTION
1191 *
1192 * INPUT
1193 *
1194 * OUTPUT
1195 *
1196 * RETURNS
1197 *
1198 * AUTHOR
1199 *
1200 *   POV-Ray Team
1201 *
1202 * DESCRIPTION
1203 *
1204 * CHANGES
1205 *
1206 ******************************************************************************/
1207 
Copy_Finish(const FINISH * Old)1208 FINISH *Copy_Finish(const FINISH *Old)
1209 {
1210 	FINISH *New;
1211 
1212 	if (Old != NULL)
1213 	{
1214 		New = Create_Finish();
1215 		*New = *Old;
1216 	}
1217 	else
1218 		New = NULL;
1219 	return (New);
1220 }
1221 
1222 
1223 
1224 /*****************************************************************************
1225 *
1226 * FUNCTION
1227 *
1228 * INPUT
1229 *
1230 * OUTPUT
1231 *
1232 * RETURNS
1233 *
1234 * AUTHOR
1235 *
1236 *   POV-Ray Team
1237 *
1238 * DESCRIPTION
1239 *
1240 * CHANGES
1241 *
1242 ******************************************************************************/
1243 
Create_Texture()1244 TEXTURE *Create_Texture()
1245 {
1246 	TEXTURE *New;
1247 
1248 	New = reinterpret_cast<TEXTURE *>(POV_MALLOC(sizeof (TEXTURE), "texture"));
1249 
1250 	Init_TPat_Fields(reinterpret_cast<TPATTERN *>(New));
1251 
1252 	New->References = 1;
1253 
1254 	New->Type    = PLAIN_PATTERN;
1255 	New->Flags  |= NO_FLAGS; // [CLi] Already initialized by Init_TPat_Fields
1256 
1257 	New->Pigment = NULL;
1258 	New->Tnormal = NULL;
1259 	New->Finish  = NULL;
1260 
1261 	New->Next          = NULL;
1262 	New->Next_Material = NULL;
1263 
1264 	New->Materials = NULL;
1265 	New->Num_Of_Mats = 0;
1266 
1267 	return (New);
1268 }
1269 
1270 
1271 /*****************************************************************************
1272 *
1273 * FUNCTION
1274 *
1275 * INPUT
1276 *
1277 * OUTPUT
1278 *
1279 * RETURNS
1280 *
1281 * AUTHOR
1282 *
1283 *   POV-Ray Team
1284 *
1285 * DESCRIPTION
1286 *
1287 * CHANGES
1288 *
1289 ******************************************************************************/
1290 
Copy_Texture_Pointer(TEXTURE * Texture)1291 TEXTURE *Copy_Texture_Pointer(TEXTURE *Texture)
1292 {
1293 	if (Texture != NULL)
1294 	{
1295 		Texture->References++;
1296 	}
1297 
1298 	return(Texture);
1299 }
1300 
1301 
1302 
1303 
1304 /*****************************************************************************
1305 *
1306 * FUNCTION
1307 *
1308 * INPUT
1309 *
1310 * OUTPUT
1311 *
1312 * RETURNS
1313 *
1314 * AUTHOR
1315 *
1316 *   POV-Ray Team
1317 *
1318 * DESCRIPTION
1319 *
1320 * CHANGES
1321 *
1322 ******************************************************************************/
1323 
Copy_Textures(const TEXTURE * Textures)1324 TEXTURE *Copy_Textures(const TEXTURE *Textures)
1325 {
1326 	TEXTURE *New, *First, *Previous;
1327 	const TEXTURE *Layer;
1328 
1329 	Previous = First = NULL;
1330 
1331 	for (Layer = Textures; Layer != NULL; Layer = reinterpret_cast<const TEXTURE *>(Layer->Next))
1332 	{
1333 		New = Create_Texture();
1334 		Copy_TPat_Fields (reinterpret_cast<TPATTERN *>(New), reinterpret_cast<const TPATTERN *>(Layer));
1335 
1336 		/*  Mesh copies a texture pointer that already has multiple
1337 		    references.  We just want a clean copy, not a copy
1338 		    that's multply referenced.
1339 		 */
1340 
1341 		New->References = 1;
1342 
1343 		switch (Layer->Type)
1344 		{
1345 			case PLAIN_PATTERN:
1346 				New->Pigment = Copy_Pigment(Layer->Pigment);
1347 				New->Tnormal = Copy_Tnormal(Layer->Tnormal);
1348 				New->Finish  = Copy_Finish(Layer->Finish);
1349 
1350 				break;
1351 
1352 			case BITMAP_PATTERN:
1353 
1354 				New->Materials   = Copy_Materials(Layer->Materials);
1355 				New->Num_Of_Mats = Layer->Num_Of_Mats;
1356 
1357 //				Not needed. Copied by Copy_TPat_Fields:
1358 //				New->Vals.Image  = Copy_Image(Layer->Vals.Image);
1359 
1360 				break;
1361 		}
1362 
1363 		if (First == NULL)
1364 		{
1365 			First = New;
1366 		}
1367 
1368 		if (Previous != NULL)
1369 		{
1370 			Previous->Next = reinterpret_cast<TPATTERN *>(New);
1371 		}
1372 
1373 		Previous = New;
1374 	}
1375 
1376 	return (First);
1377 }
1378 
1379 
1380 
1381 /*****************************************************************************
1382 *
1383 * FUNCTION
1384 *
1385 * INPUT
1386 *
1387 * OUTPUT
1388 *
1389 * RETURNS
1390 *
1391 * AUTHOR
1392 *
1393 *   POV-Ray Team
1394 *
1395 * DESCRIPTION
1396 *
1397 * CHANGES
1398 *
1399 ******************************************************************************/
1400 
Copy_Materials(TEXTURE * Old)1401 static TEXTURE *Copy_Materials(TEXTURE *Old)
1402 {
1403 	TEXTURE *New, *First, *Previous, *Material;
1404 
1405 	Previous = First = NULL;
1406 
1407 	for (Material = Old; Material != NULL; Material = Material->Next_Material)
1408 	{
1409 		New = Copy_Textures(Material);
1410 
1411 		if (First == NULL)
1412 		{
1413 			First = New;
1414 		}
1415 
1416 		if (Previous != NULL)
1417 		{
1418 			Previous->Next_Material = New;
1419 		}
1420 
1421 		Previous = New;
1422 	}
1423 
1424 	return (First);
1425 }
1426 
1427 
1428 
1429 /*****************************************************************************
1430 *
1431 * FUNCTION
1432 *
1433 * INPUT
1434 *
1435 * OUTPUT
1436 *
1437 * RETURNS
1438 *
1439 * AUTHOR
1440 *
1441 *   POV-Ray Team
1442 *
1443 * DESCRIPTION
1444 *
1445 * CHANGES
1446 *
1447 ******************************************************************************/
1448 
Destroy_Textures(TEXTURE * Textures)1449 void Destroy_Textures(TEXTURE *Textures)
1450 {
1451 	TEXTURE *Layer = Textures;
1452 	TEXTURE *Mats;
1453 	TEXTURE *Temp;
1454 
1455 	if ((Textures == NULL) || (--(Textures->References) > 0))
1456 	{
1457 		return;
1458 	}
1459 
1460 	while (Layer != NULL)
1461 	{
1462 		Mats = Layer->Next_Material;
1463 
1464 		while (Mats != NULL)
1465 		{
1466 			Temp = Mats->Next_Material;
1467 			Mats->Next_Material = NULL;
1468 			Destroy_Textures(Mats);
1469 			Mats = Temp;
1470 		}
1471 
1472 		Destroy_TPat_Fields(reinterpret_cast<TPATTERN *>(Layer));
1473 
1474 		switch (Layer->Type)
1475 		{
1476 			case PLAIN_PATTERN:
1477 
1478 				Destroy_Pigment(Layer->Pigment);
1479 				Destroy_Tnormal(Layer->Tnormal);
1480 				Destroy_Finish(Layer->Finish);
1481 
1482 			break;
1483 
1484 
1485 			case BITMAP_PATTERN:
1486 
1487 				Destroy_Textures(Layer->Materials);
1488 				/*taken care of by Destroy_TPat_Fields*/
1489 				/*Destroy_Image(Layer->Vals.Image);*/
1490 
1491 			break;
1492 		}
1493 
1494 		Temp = reinterpret_cast<TEXTURE *>(Layer->Next);
1495 		POV_FREE(Layer);
1496 		Layer = Temp;
1497 	}
1498 }
1499 
1500 
1501 
1502 /*****************************************************************************
1503 *
1504 * FUNCTION
1505 *
1506 * INPUT
1507 *
1508 * OUTPUT
1509 *
1510 * RETURNS
1511 *
1512 * AUTHOR
1513 *
1514 *   POV-Ray Team
1515 *
1516 * DESCRIPTION
1517 *
1518 * CHANGES
1519 *
1520 ******************************************************************************/
1521 
Post_Textures(TEXTURE * Textures)1522 void Post_Textures(TEXTURE *Textures)
1523 {
1524 	TEXTURE *Layer, *Material;
1525 	int i;
1526 	BLEND_MAP *Map;
1527 
1528 	if (Textures == NULL)
1529 	{
1530 		return;
1531 	}
1532 
1533 	for (Layer = Textures; Layer != NULL; Layer = reinterpret_cast<TEXTURE *>(Layer->Next))
1534 	{
1535 		if (!((Layer->Flags) & POST_DONE))
1536 		{
1537 			switch (Layer->Type)
1538 			{
1539 				case PLAIN_PATTERN:
1540 
1541 					if(Layer->Tnormal)
1542 					{
1543 						Layer->Tnormal->Flags |=
1544 							(Layer->Flags & DONT_SCALE_BUMPS_FLAG);
1545 					}
1546 					Post_Pigment(Layer->Pigment);
1547 					Post_Tnormal(Layer->Tnormal);
1548 
1549 					break;
1550 
1551 				case BITMAP_PATTERN:
1552 
1553 					for (Material = Layer->Materials; Material != NULL; Material = Material->Next_Material)
1554 
1555 						Post_Textures(Material);
1556 
1557 						break;
1558 			}
1559 
1560 			if ((Map=Layer->Blend_Map) != NULL)
1561 			{
1562 				for (i = 0; i < Map->Number_Of_Entries; i++)
1563 				{
1564 					Map->Blend_Map_Entries[i].Vals.Texture->Flags |=
1565 						(Layer->Flags & DONT_SCALE_BUMPS_FLAG);
1566 					Post_Textures(Map->Blend_Map_Entries[i].Vals.Texture);
1567 				}
1568 			}
1569 			else
1570 			{
1571 				if (Layer->Type == AVERAGE_PATTERN)
1572 				{
1573 					throw POV_EXCEPTION_STRING("No texture map in averaged texture.");
1574 				}
1575 			}
1576 		}
1577 	}
1578 }
1579 
1580 
1581 
1582 /*****************************************************************************
1583 *
1584 * FUNCTION
1585 *
1586 *   Test_Opacity
1587 *
1588 * INPUT
1589 *
1590 *   Object - Pointer to object
1591 *
1592 * OUTPUT
1593 *
1594 * RETURNS
1595 *
1596 *   int - true, if opaque
1597 *
1598 * AUTHOR
1599 *
1600 *   Dieter Bayer
1601 *
1602 * DESCRIPTION
1603 *
1604 *   Test wether an object is opaque or not, i.e. wether the texture contains
1605 *   a non-zero filter or alpha channel.
1606 *
1607 * CHANGES
1608 *
1609 *   Aug 1994 : Creation.
1610 *
1611 *   Oct 1994 : Added code to check for opaque image maps. [DB]
1612 *
1613 *   Jun 1995 : Added code to check for alpha channel image maps. [DB]
1614 *
1615 ******************************************************************************/
1616 
Test_Opacity(const TEXTURE * Texture)1617 int Test_Opacity(const TEXTURE *Texture)
1618 {
1619 	int Opaque, Help;
1620 	const TEXTURE *Layer, *Material;
1621 
1622 	if (Texture == NULL)
1623 	{
1624 		return(false);
1625 	}
1626 
1627 	/* We assume that the object is not opaque. */
1628 
1629 	Opaque = false;
1630 
1631 	/* Test all layers. If at least one layer is opaque the object is opaque. */
1632 
1633 	for (Layer = Texture; Layer != NULL; Layer = reinterpret_cast<const TEXTURE *>(Layer->Next))
1634 	{
1635 		switch (Layer->Type)
1636 		{
1637 			case PLAIN_PATTERN:
1638 
1639 				/* Test image map for opacity. */
1640 
1641 				if (!(Layer->Pigment->Flags & HAS_FILTER))
1642 				{
1643 					Opaque = true;
1644 				}
1645 
1646 				break;
1647 
1648 			case BITMAP_PATTERN:
1649 
1650 				/* Layer is not opaque if the image map is used just once. */
1651 
1652 				if (Layer->Vals.image != NULL)
1653 				{
1654 					if (Layer->Vals.image->Once_Flag)
1655 					{
1656 						break;
1657 					}
1658 				}
1659 
1660 				/* Layer is opaque if all materials are opaque. */
1661 
1662 				Help = true;
1663 
1664 				for (Material = Layer->Materials; Material != NULL; Material = Material->Next_Material)
1665 				{
1666 					if (!Test_Opacity(Material))
1667 					{
1668 						/* Material is not opaque --> layer is not opaque. */
1669 
1670 						Help = false;
1671 
1672 						break;
1673 					}
1674 				}
1675 
1676 				if (Help)
1677 				{
1678 					Opaque = true;
1679 				}
1680 
1681 				break;
1682 		}
1683 	}
1684 
1685 	return(Opaque);
1686 }
1687 
1688 
1689 #if defined(USE_AVX_FMA4_FOR_NOISE)
1690 
1691 /********************************************************************************************/
1692 /* AMD Specific optimizations: Its found that more than 50% of the time is spent in         */
1693 /* Noise and DNoise. These functions have been optimized using AVX and FMA4 instructions    */
1694 /*                                                                                          */
1695 /********************************************************************************************/
1696 DBL (*Noise) (const VECTOR EPoint, int noise_generator);
1697 void (*DNoise) (VECTOR result, const VECTOR EPoint);
1698 
1699 /*****************************************************************************
1700 *
1701 * FUNCTION
1702 *
1703 *   Initialise_NoiseDispatch
1704 *
1705 * INPUT
1706 *
1707 *  None
1708 *
1709 * OUTPUT
1710 *		Initialises the Noise and DNoise Function pointers to the right functions
1711 *
1712 *
1713 * RETURNS
1714 *
1715 *  None
1716 *
1717 * AUTHOR
1718 *
1719 *   AMD
1720 *
1721 * DESCRIPTION
1722 *
1723 * CHANGES
1724 *
1725 *
1726 ******************************************************************************/
1727 
Initialise_NoiseDispatch()1728 void Initialise_NoiseDispatch()
1729 {
1730 	static bool cpu_detected = false;
1731 
1732 	if(!cpu_detected)
1733 	{
1734 			if(CPU_FMA4_DETECT())
1735 			{
1736 				Noise = AVX_FMA4_Noise;
1737 				DNoise = AVX_FMA4_DNoise;
1738 			}
1739 			else
1740 			{
1741 				Noise = OriNoise;
1742 				DNoise = OriDNoise;
1743 			}
1744 
1745 			cpu_detected = true;
1746 	}
1747 }
1748 
1749 
1750 
1751 /*****************************************************************************
1752 *
1753 * FUNCTION
1754 *
1755 *   AVX_FMA4_Noise
1756 *
1757 * INPUT
1758 *
1759 *   EPoint -- 3-D point at which noise is evaluated
1760 *
1761 * OUTPUT
1762 *
1763 * RETURNS
1764 *
1765 *   DBL noise value
1766 *
1767 * AUTHOR
1768 *
1769 *   Robert Skinner based on Ken Perlin
1770 *
1771 * DESCRIPTION
1772 *
1773 * CHANGES
1774 *   Modified by AAC to ensure uniformly distributed clamped values
1775 *   between 0 and 1.0...
1776 *
1777 *   Feb 8, 2001: modified function based on MegaPov 0.7 to remove
1778 *                bugs that showed up when noise was translated.
1779 *
1780 *     Changes Made by AMD
1781 *    July 28, 2011: Re-wrote OriNoise function using AVX and FMA4 intrinsic
1782 *
1783 ******************************************************************************/
1784 
AVX_FMA4_Noise(const VECTOR EPoint,int noise_generator)1785 DBL AVX_FMA4_Noise(const VECTOR EPoint, int noise_generator)
1786 {
1787 	DBL x, y, z;
1788 	DBL *mp;
1789 	int tmp;
1790 	int ix, iy, iz;
1791 	int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
1792 	DBL x_ix,y_iy, z_iz;
1793 
1794 	DBL sum = 0.0;
1795 
1796 	// TODO FIXME - global statistics reference
1797 	// Stats[Calls_To_Noise]++;
1798 
1799 	if (noise_generator==3)
1800 	{
1801 		// The 1.59 and 0.985 are to correct for some biasing problems with
1802 		// the random # generator used to create the noise tables.  Final
1803 		// range of values is about 5.0e-4 below 0.0 and above 1.0.  Mean
1804 		// value is 0.49 (ideally it would be 0.5).
1805 		sum = 0.5 * (1.59 * SolidNoise(EPoint) + 0.985);
1806 
1807 		// Clamp final value to 0-1 range
1808 			if (sum < 0.0) sum = 0.0;
1809 			if (sum > 1.0) sum = 1.0;
1810 
1811 		return sum;
1812 	}
1813 
1814 	x = EPoint[X];
1815 	y = EPoint[Y];
1816 	z = EPoint[Z];
1817 
1818 	/* its equivalent integer lattice point. */
1819 	/* ix = (int)x; iy = (int)y; iz = (long)z; */
1820 	/* JB fix for the range problem */
1821 	tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
1822 	ix = (int)((tmp-MINX)&0xFFF);
1823 	x_ix = x-tmp;
1824 
1825 	tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
1826 	iy = (int)((tmp-MINY)&0xFFF);
1827 	y_iy = y-tmp;
1828 
1829 	tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
1830 	iz = (int)((tmp-MINZ)&0xFFF);
1831 	z_iz = z-tmp;
1832 
1833 
1834 	ixiy_hash = Hash2d(ix,     iy);
1835 	jxiy_hash = Hash2d(ix + 1, iy);
1836 	ixjy_hash = Hash2d(ix,     iy + 1);
1837 	jxjy_hash = Hash2d(ix + 1, iy + 1);
1838 
1839 	__m128d three = {3.0,3.0};
1840 	__m128d two = {2.0,2.0};
1841 	__m128d one = {1.0,1.0};
1842 
1843 	__m128d ix_mm = _mm_set1_pd(x_ix);
1844 	__m128d iy_mm = _mm_set1_pd(y_iy);
1845 	__m128d iz_mm = _mm_set1_pd(z_iz);
1846 
1847 	__m128d jx_mm = _mm_sub_pd(ix_mm,one);
1848 	__m128d jy_mm = _mm_sub_pd(iy_mm,one);
1849 	__m128d jz_mm = _mm_sub_pd(iz_mm,one);
1850 
1851 
1852 	__m128d mm_sx = _mm_mul_pd(_mm_mul_pd(ix_mm,ix_mm), _mm_nmacc_pd(two,ix_mm,three));
1853 	__m128d mm_sy =  _mm_mul_pd(_mm_mul_sd(iy_mm,iy_mm),_mm_nmacc_sd(two,iy_mm,three));
1854 	__m128d mm_sz =  _mm_mul_pd(_mm_mul_pd(iz_mm,iz_mm),_mm_nmacc_pd(two,iz_mm,three));
1855 
1856 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
1857 	DBL *mp2 = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
1858 
1859 	__m128d mm_tx = _mm_sub_pd(one,mm_sx);
1860 	__m128d mm_ty = _mm_sub_sd(one,mm_sy);
1861 	__m128d mm_tz = _mm_sub_pd(one,mm_sz);
1862 	__m128d temp_mm = _mm_unpacklo_pd(mm_ty,mm_sy);
1863 	__m128d mm_txty_txsy = _mm_mul_pd(mm_tx,temp_mm);
1864 	__m128d mm_sxty_sxsy = _mm_mul_pd(mm_sx,temp_mm);
1865 
1866 	__m128d mp_t1 = _mm_loadu_pd(mp + 1);
1867 	__m128d mp_t2 = _mm_loadu_pd(mp2 + 1);
1868 
1869 
1870 	__m128d mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4));   // 44
1871 	__m128d mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6));   // 66
1872 
1873 	__m128d mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
1874 	__m128d mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
1875 
1876 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
1877 	mp2 = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
1878 
1879 	__m128d s_mm = _mm_mul_pd(mm_txty_txsy,mm_tz);
1880 	__m128d y_mm = _mm_unpacklo_pd(iy_mm,jy_mm);
1881 
1882 	__m128d int_sumf = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
1883 	int_sumf = _mm_macc_pd(y_mm,mp4_mm,int_sumf);
1884 	int_sumf = _mm_macc_pd(iz_mm,mp6_mm,int_sumf);
1885 	int_sumf = _mm_mul_pd(s_mm,int_sumf);
1886 //-2---------------------------------------------------------------
1887 
1888 	 mp_t1 = _mm_loadu_pd(mp + 1);
1889 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
1890 
1891 	 mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4));   // 44
1892 	 mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6));   // 66
1893 
1894 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
1895 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
1896 
1897 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
1898 	mp2 = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
1899 
1900 	s_mm = _mm_mul_pd(mm_txty_txsy,mm_sz);
1901 
1902 	__m128d int_sum1 = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
1903 	int_sum1 = _mm_macc_pd(y_mm,mp4_mm,int_sum1);
1904 	int_sum1 = _mm_macc_pd(jz_mm,mp6_mm,int_sum1);
1905 	int_sum1 = _mm_macc_pd(s_mm,int_sum1,int_sumf);
1906 //-3---------------------------------------------------------------
1907 
1908 	 mp_t1 = _mm_loadu_pd(mp + 1);
1909 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
1910 
1911 	 mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4));   // 44
1912 	 mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6));   // 66
1913 
1914 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
1915 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
1916 
1917 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
1918 	mp2 = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
1919 
1920 	s_mm = _mm_mul_pd(mm_sxty_sxsy,mm_tz);
1921 
1922 	int_sumf = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
1923 	int_sumf = _mm_macc_pd(y_mm,mp4_mm,int_sumf);
1924 	int_sumf = _mm_macc_pd(iz_mm,mp6_mm,int_sumf);
1925 	int_sumf = _mm_macc_pd(s_mm,int_sumf,int_sum1);
1926 //-4---------------------------------------------------------------
1927 	 mp_t1 = _mm_loadu_pd(mp + 1);
1928 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
1929 
1930 	 mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4));   // 44
1931 	 mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6));   // 66
1932 
1933 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
1934 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
1935 
1936 
1937 	s_mm = _mm_mul_pd(mm_sxty_sxsy,mm_sz);
1938 
1939 	int_sum1 = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
1940 	int_sum1 = _mm_macc_pd(y_mm,mp4_mm,int_sum1);
1941 	int_sum1 = _mm_macc_pd(jz_mm,mp6_mm,int_sum1);
1942 	int_sum1 = _mm_macc_pd(s_mm,int_sum1,int_sumf);
1943 	int_sum1 = _mm_add_sd(_mm_unpackhi_pd(int_sum1,int_sum1),int_sum1);
1944 	_mm_store_sd(&sum,int_sum1);
1945 
1946 	if(noise_generator==2)
1947 	{
1948 		/* details of range here:
1949 		Min, max: -1.05242, 0.988997
1950 		Mean: -0.0191481, Median: -0.535493, Std Dev: 0.256828
1951 
1952 		We want to change it to as close to [0,1] as possible.
1953 		*/
1954 		sum += 1.05242;
1955 		sum *= 0.48985582;
1956 		/*sum *= 0.5;
1957 			sum += 0.5;*/
1958 
1959 		if (sum < 0.0)
1960 			sum = 0.0;
1961 		if (sum > 1.0)
1962 			sum = 1.0;
1963 	}
1964 	else
1965 	{
1966 		sum = sum + 0.5;                     /* range at this point -0.5 - 0.5... */
1967 
1968 		if (sum < 0.0)
1969 			sum = 0.0;
1970 		if (sum > 1.0)
1971 			sum = 1.0;
1972 	}
1973 	return (sum);
1974 }
1975 
1976 
1977 
1978 /*****************************************************************************
1979 *
1980 * FUNCTION
1981 *
1982 *   AVX_FMA4_DNoise
1983 *
1984 * INPUT
1985 *
1986 *   EPoint -- 3-D point at which noise is evaluated
1987 *
1988 * OUTPUT
1989 *
1990 *   VECTOR result
1991 *
1992 * RETURNS
1993 *
1994 * AUTHOR
1995 *
1996 *   Robert Skinner based on Ken Perlin
1997 *
1998 * DESCRIPTION
1999 *   Vector-valued version of "Noise"
2000 *
2001 * CHANGES
2002 *   Modified by AAC to ensure uniformly distributed clamped values
2003 *   between 0 and 1.0...
2004 *
2005 *   Feb 8, 2001: modified function based on MegaPov 0.7 to remove
2006 *                bugs that showed up when noise was translated.
2007 *
2008 *    Changes Made by AMD
2009 *    July 28, 2011:Re-wrote OriDNoise function using AVX and FMA4 intrinsic
2010 *
2011 ******************************************************************************/
2012 
AVX_FMA4_DNoise(VECTOR result,const VECTOR EPoint)2013 void AVX_FMA4_DNoise(VECTOR result, const VECTOR EPoint)
2014 {
2015 	DBL x, y, z;
2016 	int ix, iy, iz;
2017 	DBL *mp;
2018 	int tmp;
2019 	int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
2020 	DBL x_ix,y_iy,z_iz;
2021 
2022 	// TODO FIXME - global statistics reference
2023 	// Stats[Calls_To_DNoise]++;
2024 
2025 	x = EPoint[X];
2026 	y = EPoint[Y];
2027 	z = EPoint[Z];
2028 
2029 	/* its equivalent integer lattice point. */
2030 	/*ix = (int)x; iy = (int)y; iz = (int)z;
2031 	x_ix = x - ix; y_iy = y - iy; z_iz = z - iz;*/
2032 	            /* JB fix for the range problem */
2033 	tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
2034 	ix = (int)((tmp-MINX)&0xFFF);
2035 	x_ix = x-tmp;
2036 
2037 	tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
2038 	iy = (int)((tmp-MINY)&0xFFF);
2039 	y_iy = y-tmp;
2040 
2041 	tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
2042 	iz = (int)((tmp-MINZ)&0xFFF);
2043 	z_iz = z-tmp;
2044 
2045 	ixiy_hash = Hash2d(ix,     iy);
2046 	jxiy_hash = Hash2d(ix + 1, iy);
2047 	ixjy_hash = Hash2d(ix,     iy + 1);
2048 	jxjy_hash = Hash2d(ix + 1, iy + 1);
2049 
2050 	__m128d three = {3.0,3.0};
2051 	__m128d two = {2.0,2.0};
2052 	__m128d one = {1.0,1.0};
2053 
2054 	__m128d ix_mm = _mm_set1_pd(x_ix);
2055 	__m128d iy_mm = _mm_set1_pd(y_iy);
2056 	__m128d iz_mm = _mm_set1_pd(z_iz);
2057 
2058 	__m128d jx_mm = _mm_sub_pd(ix_mm,one);
2059 	__m128d jy_mm = _mm_sub_pd(iy_mm,one);
2060 	__m128d jz_mm = _mm_sub_pd(iz_mm,one);
2061 
2062 	__m128d mm_sx = _mm_mul_pd(_mm_mul_pd(ix_mm,ix_mm), _mm_nmacc_pd(two,ix_mm,three));
2063 	__m128d mm_sy =  _mm_mul_pd(_mm_mul_pd(iy_mm,iy_mm),_mm_nmacc_pd(two,iy_mm,three));
2064 	__m128d mm_sz =  _mm_mul_pd(_mm_mul_pd(iz_mm,iz_mm),_mm_nmacc_pd(two,iz_mm,three));
2065 
2066 	__m128d mm_tx = _mm_sub_pd(one,mm_sx);
2067 	__m128d mm_ty = _mm_sub_pd(one,mm_sy);
2068 	__m128d mm_tz = _mm_sub_pd(one,mm_sz);
2069 
2070 	__m128d mm_txty = _mm_mul_pd(mm_tx,mm_ty);
2071 	__m128d mm_s = _mm_mul_pd(mm_txty,mm_tz);
2072 
2073 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
2074 	DBL *mp2 = mp+8;
2075 
2076 	__m128d mp_t1 = _mm_loadu_pd(mp + 1);
2077 	__m128d mp_t2 = _mm_loadu_pd(mp2 + 1);
2078 	__m128d mp_t3 =  _mm_load_sd(mp + 4);
2079 	__m128d mp_t4 =  _mm_load_sd(mp2 + 4);
2080 	__m128d mp_t5 =  _mm_load_sd(mp + 6);
2081 	__m128d mp_t6 =  _mm_load_sd(mp2 + 6);
2082 
2083 	__m128d mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2084 	__m128d mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2085 
2086 	__m128d mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2087 	__m128d mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2088 
2089 	__m128d sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2090 	sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,sum_X_Y);
2091 	sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,sum_X_Y);
2092 	sum_X_Y = _mm_mul_pd(mm_s,sum_X_Y);
2093 
2094 	mp2 = mp2+8;
2095 	mp1_mm = _mm_load_sd(mp2 + 1);
2096 	mp2_mm = _mm_load_sd(mp2 + 2);
2097 	mp4_mm = _mm_load_sd(mp2 + 4);
2098 	mp6_mm = _mm_load_sd(mp2 + 6);
2099 
2100 
2101 	__m128d sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2102 	sum__Z = _mm_macc_sd(iy_mm,mp4_mm,sum__Z);
2103 	sum__Z = _mm_macc_sd(iz_mm,mp6_mm,sum__Z);
2104 	sum__Z = _mm_mul_sd(mm_s,sum__Z);
2105 
2106 //2--------------------------------------------------
2107 
2108 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
2109 	mp2 = mp+8;
2110 
2111 	 mp_t1 = _mm_loadu_pd(mp + 1);
2112 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
2113 	 mp_t3 =  _mm_load_sd(mp + 4);
2114 	 mp_t4 =  _mm_load_sd(mp2 + 4);
2115 	 mp_t5 =  _mm_load_sd(mp + 6);
2116 	 mp_t6 =  _mm_load_sd(mp2 + 6);
2117 
2118 	__m128d mm_sxty = _mm_mul_pd(mm_sx,mm_ty);
2119 	mm_s = _mm_mul_pd(mm_sxty,mm_tz);
2120 
2121 
2122 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2123 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2124 
2125 	 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2126 	 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2127 
2128 	mp2 = mp2+8;
2129 
2130 	__m128d t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2131 	t_sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,t_sum_X_Y);
2132 	t_sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,t_sum_X_Y);
2133 	sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2134 
2135 
2136 	mp1_mm = _mm_load_sd(mp2 + 1);
2137 	mp2_mm = _mm_load_sd(mp2 + 2);
2138 	mp4_mm = _mm_load_sd(mp2 + 4);
2139 	mp6_mm = _mm_load_sd(mp2 + 6);
2140 
2141 	__m128d t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2142 	t_sum__Z = _mm_macc_sd(iy_mm,mp4_mm,t_sum__Z);
2143 	t_sum__Z = _mm_macc_sd(iz_mm,mp6_mm,t_sum__Z);
2144 	sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2145 
2146 //3---------------------------------------------------------------
2147 
2148 
2149 
2150 	mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
2151 	mp2 = mp+8;
2152 
2153 	 mp_t1 = _mm_loadu_pd(mp + 1);
2154 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
2155 	 mp_t3 =  _mm_load_sd(mp + 4);
2156 	 mp_t4 =  _mm_load_sd(mp2 + 4);
2157 	 mp_t5 =  _mm_load_sd(mp + 6);
2158 	 mp_t6 =  _mm_load_sd(mp2 + 6);
2159 
2160 	__m128d mm_sxsy = _mm_mul_pd(mm_sx,mm_sy);
2161 	mm_s = _mm_mul_pd(mm_sxsy,mm_tz);
2162 
2163 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2164 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2165 
2166 	 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2167 	 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2168 
2169 	mp2 = mp2+8;
2170 
2171 	t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2172 	t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2173 	t_sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,t_sum_X_Y);
2174 	sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2175 
2176 
2177 	mp1_mm = _mm_load_sd(mp2 + 1);
2178 	mp2_mm = _mm_load_sd(mp2 + 2);
2179 	mp4_mm = _mm_load_sd(mp2 + 4);
2180 	mp6_mm = _mm_load_sd(mp2 + 6);
2181 
2182 	t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2183 	t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2184 	t_sum__Z = _mm_macc_sd(iz_mm,mp6_mm,t_sum__Z);
2185 	sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2186 
2187 //4-------------------------------------------------------------------
2188 	mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
2189 	mp2 = mp+8;
2190 
2191 	 mp_t1 = _mm_loadu_pd(mp + 1);
2192 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
2193 	 mp_t3 =  _mm_load_sd(mp + 4);
2194 	 mp_t4 =  _mm_load_sd(mp2 + 4);
2195 	 mp_t5 =  _mm_load_sd(mp + 6);
2196 	 mp_t6 =  _mm_load_sd(mp2 + 6);
2197 
2198 	__m128d mm_txsy = _mm_mul_pd(mm_tx,mm_sy);
2199 	mm_s = _mm_mul_pd(mm_txsy,mm_tz);
2200 
2201 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2202 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2203 
2204 	 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2205 	 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2206 	mp2 = mp2+8;
2207 
2208 	t_sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2209 	t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2210 	t_sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,t_sum_X_Y);
2211 	sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2212 
2213 
2214 	mp1_mm = _mm_load_sd(mp2 + 1);
2215 	mp2_mm = _mm_load_sd(mp2 + 2);
2216 	mp4_mm = _mm_load_sd(mp2 + 4);
2217 	mp6_mm = _mm_load_sd(mp2 + 6);
2218 
2219 	t_sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2220 	t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2221 	t_sum__Z = _mm_macc_sd(iz_mm,mp6_mm,t_sum__Z);
2222 	sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2223 //5-----------------------------------------------------
2224 	mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
2225 	mp2 = mp+8;
2226 
2227 	 mp_t1 = _mm_loadu_pd(mp + 1);
2228 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
2229 	 mp_t3 =  _mm_load_sd(mp + 4);
2230 	 mp_t4 =  _mm_load_sd(mp2 + 4);
2231 	 mp_t5 =  _mm_load_sd(mp + 6);
2232 	 mp_t6 =  _mm_load_sd(mp2 + 6);
2233 
2234 	mm_s = _mm_mul_pd(mm_txsy,mm_sz);
2235 
2236 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2237 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2238 
2239 	 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2240 	 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2241 	mp2 = mp2+8;
2242 
2243 	t_sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2244 	t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2245 	t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2246 	sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2247 
2248 
2249 	mp1_mm = _mm_load_sd(mp2 + 1);
2250 	mp2_mm = _mm_load_sd(mp2 + 2);
2251 	mp4_mm = _mm_load_sd(mp2 + 4);
2252 	mp6_mm = _mm_load_sd(mp2 + 6);
2253 
2254 	t_sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2255 	t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2256 	t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2257 	sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2258 
2259 //6------------------
2260 	mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
2261 	mp2 = mp+8;
2262 
2263 	 mp_t1 = _mm_loadu_pd(mp + 1);
2264 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
2265 	 mp_t3 =  _mm_load_sd(mp + 4);
2266 	 mp_t4 =  _mm_load_sd(mp2 + 4);
2267 	 mp_t5 =  _mm_load_sd(mp + 6);
2268 	 mp_t6 =  _mm_load_sd(mp2 + 6);
2269 
2270 	mm_s = _mm_mul_pd(mm_sxsy,mm_sz);
2271 
2272 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2273 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2274 
2275 	 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2276 	 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2277 
2278 	 mp2 = mp2+8;
2279 
2280 	t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2281 	t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2282 	t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2283 	sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2284 
2285 
2286 	mp1_mm = _mm_load_sd(mp2 + 1);
2287 	mp2_mm = _mm_load_sd(mp2 + 2);
2288 	mp4_mm = _mm_load_sd(mp2 + 4);
2289 	mp6_mm = _mm_load_sd(mp2 + 6);
2290 
2291 	t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2292 	t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2293 	t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2294 	sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2295 
2296 //7-----------
2297 	mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
2298 	mp2 = mp+8;
2299 
2300 	 mp_t1 = _mm_loadu_pd(mp + 1);
2301 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
2302 	 mp_t3 =  _mm_load_sd(mp + 4);
2303 	 mp_t4 =  _mm_load_sd(mp2 + 4);
2304 	 mp_t5 =  _mm_load_sd(mp + 6);
2305 	 mp_t6 =  _mm_load_sd(mp2 + 6);
2306 
2307 	mm_s = _mm_mul_pd(mm_sxty,mm_sz);
2308 
2309 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2310 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2311 
2312 	 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2313 	 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2314 	 mp2 = mp2+8;
2315 	t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2316 	t_sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,t_sum_X_Y);
2317 	t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2318 	sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2319 
2320 
2321 	mp1_mm = _mm_load_sd(mp2 + 1);
2322 	mp2_mm = _mm_load_sd(mp2 + 2);
2323 	mp4_mm = _mm_load_sd(mp2 + 4);
2324 	mp6_mm = _mm_load_sd(mp2 + 6);
2325 
2326 	t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2327 	t_sum__Z = _mm_macc_sd(iy_mm,mp4_mm,t_sum__Z);
2328 	t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2329 	sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2330 
2331 //8--------------
2332 
2333 
2334 	mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
2335 	mp2 = mp+8;
2336 
2337 	 mp_t1 = _mm_loadu_pd(mp + 1);
2338 	 mp_t2 = _mm_loadu_pd(mp2 + 1);
2339 	 mp_t3 =  _mm_load_sd(mp + 4);
2340 	 mp_t4 =  _mm_load_sd(mp2 + 4);
2341 	 mp_t5 =  _mm_load_sd(mp + 6);
2342 	 mp_t6 =  _mm_load_sd(mp2 + 6);
2343 
2344 	mm_s = _mm_mul_pd(mm_txty,mm_sz);
2345 
2346 	 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2);   // 11
2347 	 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2);   // 22
2348 
2349 	 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4);   // 44
2350 	 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6);   // 66
2351 
2352 	mp2 = mp2+8;
2353 
2354 	t_sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2355 	t_sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,t_sum_X_Y);
2356 	t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2357 	sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2358 
2359 
2360 	mp1_mm = _mm_load_sd(mp2 + 1);
2361 	mp2_mm = _mm_load_sd(mp2 + 2);
2362 	mp4_mm = _mm_load_sd(mp2 + 4);
2363 	mp6_mm = _mm_load_sd(mp2 + 6);
2364 
2365 	t_sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2366 	t_sum__Z = _mm_macc_sd(iy_mm,mp4_mm,t_sum__Z);
2367 	t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2368 	sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2369 
2370 	_mm_storeu_pd(result,sum_X_Y);
2371 
2372 	_mm_store_sd(&result[Z],sum__Z);
2373 
2374 }
2375 
2376 #endif
2377 
2378 
2379 }
2380