1 /*******************************************************************************
2 * texture.cpp
3 *
4 * This module implements texturing functions such as noise, turbulence and
5 * texture transformation functions. The actual texture routines are in the
6 * files pigment.c & normal.c.
7 * The noise function used here is the one described by Ken Perlin in
8 * "Hypertexture", SIGGRAPH '89 Conference Proceedings page 253.
9 *
10 * ---------------------------------------------------------------------------
11 * Persistence of Vision Ray Tracer ('POV-Ray') version 3.7.
12 * Copyright 1991-2013 Persistence of Vision Raytracer Pty. Ltd.
13 *
14 * POV-Ray is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Affero General Public License as
16 * published by the Free Software Foundation, either version 3 of the
17 * License, or (at your option) any later version.
18 *
19 * POV-Ray is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Affero General Public License for more details.
23 *
24 * You should have received a copy of the GNU Affero General Public License
25 * along with this program. If not, see <http://www.gnu.org/licenses/>.
26 * ---------------------------------------------------------------------------
27 * POV-Ray is based on the popular DKB raytracer version 2.12.
28 * DKBTrace was originally written by David K. Buck.
29 * DKBTrace Ver 2.0-2.12 were written by David K. Buck & Aaron A. Collins.
30 * ---------------------------------------------------------------------------
31 * $File: //depot/public/povray/3.x/source/backend/texture/texture.cpp $
32 * $Revision: #1 $
33 * $Change: 6069 $
34 * $DateTime: 2013/11/06 11:59:40 $
35 * $Author: chrisc $
36 *******************************************************************************/
37
38 /*
39 Some texture ideas garnered from SIGGRAPH '85 Volume 19 Number 3,
40 "An Image Synthesizer" By Ken Perlin.
41 Further Ideas Garnered from "The RenderMan Companion" (Addison Wesley)
42 */
43
44 // frame.h must always be the first POV file included (pulls in platform config)
45 #include "backend/frame.h"
46 #include "backend/texture/texture.h"
47 #include "backend/texture/pigment.h"
48 #include "backend/texture/normal.h"
49 #include "backend/support/imageutil.h"
50 #include "backend/math/vector.h"
51 #include "base/pov_err.h"
52
53 #if defined(USE_AVX_FMA4_FOR_NOISE)
54 #include "avxfma4check.h"
55 #endif
56
57 #include <algorithm>
58
59 // this must be the last file included
60 #include "base/povdebug.h"
61
62 namespace pov
63 {
64
65 // TODO FIXME GLOBAL VARIABLE
66 static unsigned int next_rand = 1;
67
68 /*****************************************************************************
69 * Static functions
70 ******************************************************************************/
71
72 static void InitTextureTable (void);
73 static TEXTURE *Copy_Materials (TEXTURE *Old);
74 static void InitSolidNoise(void);
75 static DBL SolidNoise(const VECTOR P);
76
77 /*****************************************************************************
78 * Local preprocessor defines
79 ******************************************************************************/
80
81 /* Ridiculously large scaling values */
82
83 const int MINX = -10000;
84 const int MINY = MINX;
85 const int MINZ = MINX;
86
87 const int SINTABSIZE = 1000;
88
89 #define SCURVE(a) ((a)*(a)*(3.0-2.0*(a)))
90
91 // Hash2d assumed values in the range 0..8191
92 #define Hash2d(a,b) \
93 hashTable[(int)(hashTable[(int)(a)] ^ (b))]
94
95 // Hash1dRTableIndex assumed values in the range 0..8191
96 #define Hash1dRTableIndex(a,b) \
97 ((hashTable[(int)(a) ^ (b)] & 0xFF) * 2)
98
99 #define INCRSUM(m,s,x,y,z) \
100 ((s)*(RTable[m+1] + RTable[m+2]*(x) + RTable[m+4]*(y) + RTable[m+6]*(z)))
101
102 #define INCRSUMP(mp,s,x,y,z) \
103 ((s)*((mp[1]) + (mp[2])*(x) + (mp[4])*(y) + (mp[6])*(z)))
104
105
106 /*****************************************************************************
107 * Local typedefs
108 ******************************************************************************/
109
110
111
112 /*****************************************************************************
113 * Local variables
114 ******************************************************************************/
115
116 static DBL *sintab; // GLOBAL VARIABLE
117
118 #ifdef DYNAMIC_HASHTABLE
119 unsigned short *hashTable; // GLOBAL VARIABLE
120 #else
121 ALIGN16 unsigned short hashTable[8192]; // GLOBAL VARIABLE
122 #endif
123
124 /*****************************************************************************
125 * Local variables
126 ******************************************************************************/
127
128 ALIGN16 DBL RTable[267*2] =
129 {
130 -1, 0.0, 0.604974, 0.0, -0.937102, 0.0, 0.414115, 0.0, 0.576226, 0.0, -0.0161593, 0.0,
131 0.432334, 0.0, 0.103685, 0.0, 0.590539, 0.0, 0.0286412, 0.0, 0.46981, 0.0, -0.84622, 0.0,
132 -0.0734112, 0.0, -0.304097, 0.0, -0.40206, 0.0, -0.210132, 0.0, -0.919127, 0.0, 0.652033, 0.0,
133 -0.83151, 0.0, -0.183948, 0.0, -0.671107, 0.0, 0.852476, 0.0, 0.043595, 0.0, -0.404532, 0.0,
134 0.75494, 0.0, -0.335653, 0.0, 0.618433, 0.0, 0.605707, 0.0, 0.708583, 0.0, -0.477195, 0.0,
135 0.899474, 0.0, 0.490623, 0.0, 0.221729, 0.0, -0.400381, 0.0, -0.853727, 0.0, -0.932586, 0.0,
136 0.659113, 0.0, 0.961303, 0.0, 0.325948, 0.0, -0.750851, 0.0, 0.842466, 0.0, 0.734401, 0.0,
137 -0.649866, 0.0, 0.394491, 0.0, -0.466056, 0.0, -0.434073, 0.0, 0.109026, 0.0, 0.0847028, 0.0,
138 -0.738857, 0.0, 0.241505, 0.0, 0.16228, 0.0, -0.71426, 0.0, -0.883665, 0.0, -0.150408, 0.0,
139 -0.90396, 0.0, -0.686549, 0.0, -0.785214, 0.0, 0.488548, 0.0, 0.0246433, 0.0, 0.142473, 0.0,
140 -0.602136, 0.0, 0.375845, 0.0, -0.00779736, 0.0, 0.498955, 0.0, -0.268147, 0.0, 0.856382, 0.0,
141 -0.386007, 0.0, -0.596094, 0.0, -0.867735, 0.0, -0.570977, 0.0, -0.914366, 0.0, 0.28896, 0.0,
142 0.672206, 0.0, -0.233783, 0.0, 0.94815, 0.0, 0.895262, 0.0, 0.343252, 0.0, -0.173388, 0.0,
143 -0.767971, 0.0, -0.314748, 0.0, 0.824308, 0.0, -0.342092, 0.0, 0.721431, 0.0, -0.24004, 0.0,
144 -0.63653, 0.0, 0.553277, 0.0, 0.376272, 0.0, 0.158984, 0.0, -0.452659, 0.0, 0.396323, 0.0,
145 -0.420676, 0.0, -0.454154, 0.0, 0.122179, 0.0, 0.295857, 0.0, 0.0664225, 0.0, -0.202075, 0.0,
146 -0.724788, 0.0, 0.453513, 0.0, 0.224567, 0.0, -0.908812, 0.0, 0.176349, 0.0, -0.320516, 0.0,
147 -0.697139, 0.0, 0.742702, 0.0, -0.900786, 0.0, 0.471489, 0.0, -0.133532, 0.0, 0.119127, 0.0,
148 -0.889769, 0.0, -0.23183, 0.0, -0.669673, 0.0, -0.046891, 0.0, -0.803433, 0.0, -0.966735, 0.0,
149 0.475578, 0.0, -0.652644, 0.0, 0.0112459, 0.0, -0.730007, 0.0, 0.128283, 0.0, 0.145647, 0.0,
150 -0.619318, 0.0, 0.272023, 0.0, 0.392966, 0.0, 0.646418, 0.0, -0.0207675, 0.0, -0.315908, 0.0,
151 0.480797, 0.0, 0.535668, 0.0, -0.250172, 0.0, -0.83093, 0.0, -0.653773, 0.0, -0.443809, 0.0,
152 0.119982, 0.0, -0.897642, 0.0, 0.89453, 0.0, 0.165789, 0.0, 0.633875, 0.0, -0.886839, 0.0,
153 0.930877, 0.0, -0.537194, 0.0, 0.587732, 0.0, 0.722011, 0.0, -0.209461, 0.0, -0.0424659, 0.0,
154 -0.814267, 0.0, -0.919432, 0.0, 0.280262, 0.0, -0.66302, 0.0, -0.558099, 0.0, -0.537469, 0.0,
155 -0.598779, 0.0, 0.929656, 0.0, -0.170794, 0.0, -0.537163, 0.0, 0.312581, 0.0, 0.959442, 0.0,
156 0.722652, 0.0, 0.499931, 0.0, 0.175616, 0.0, -0.534874, 0.0, -0.685115, 0.0, 0.444999, 0.0,
157 0.17171, 0.0, 0.108202, 0.0, -0.768704, 0.0, -0.463828, 0.0, 0.254231, 0.0, 0.546014, 0.0,
158 0.869474, 0.0, 0.875212, 0.0, -0.944427, 0.0, 0.130724, 0.0, -0.110185, 0.0, 0.312184, 0.0,
159 -0.33138, 0.0, -0.629206, 0.0, 0.0606546, 0.0, 0.722866, 0.0, -0.0979477, 0.0, 0.821561, 0.0,
160 0.0931258, 0.0, -0.972808, 0.0, 0.0318151, 0.0, -0.867033, 0.0, -0.387228, 0.0, 0.280995, 0.0,
161 -0.218189, 0.0, -0.539178, 0.0, -0.427359, 0.0, -0.602075, 0.0, 0.311971, 0.0, 0.277974, 0.0,
162 0.773159, 0.0, 0.592493, 0.0, -0.0331884, 0.0, -0.630854, 0.0, -0.269947, 0.0, 0.339132, 0.0,
163 0.581079, 0.0, 0.209461, 0.0, -0.317433, 0.0, -0.284993, 0.0, 0.181323, 0.0, 0.341634, 0.0,
164 0.804959, 0.0, -0.229572, 0.0, -0.758907, 0.0, -0.336721, 0.0, 0.605463, 0.0, -0.991272, 0.0,
165 -0.0188754, 0.0, -0.300191, 0.0, 0.368307, 0.0, -0.176135, 0.0, -0.3832, 0.0, -0.749569, 0.0,
166 0.62356, 0.0, -0.573938, 0.0, 0.278309, 0.0, -0.971313, 0.0, 0.839994, 0.0, -0.830686, 0.0,
167 0.439078, 0.0, 0.66128, 0.0, 0.694514, 0.0, 0.0565042, 0.0, 0.54342, 0.0, -0.438804, 0.0,
168 -0.0228428, 0.0, -0.687068, 0.0, 0.857267, 0.0, 0.301991, 0.0, -0.494255, 0.0, -0.941039, 0.0,
169 0.775509, 0.0, 0.410575, 0.0, -0.362081, 0.0, -0.671534, 0.0, -0.348379, 0.0, 0.932433, 0.0,
170 0.886442, 0.0, 0.868681, 0.0, -0.225666, 0.0, -0.062211, 0.0, -0.0976425, 0.0, -0.641444, 0.0,
171 -0.848112, 0.0, 0.724697, 0.0, 0.473503, 0.0, 0.998749, 0.0, 0.174701, 0.0, 0.559625, 0.0,
172 -0.029099, 0.0, -0.337392, 0.0, -0.958129, 0.0, -0.659785, 0.0, 0.236042, 0.0, -0.246937, 0.0,
173 0.659449, 0.0, -0.027512, 0.0, 0.821897, 0.0, -0.226215, 0.0, 0.0181735, 0.0, 0.500481, 0.0,
174 -0.420127, 0.0, -0.427878, 0.0, 0.566186, 0.0
175 };
176
177 /*****************************************************************************/
178 /* Platform specific faster noise functions support */
179 /* (Profiling revealed that the noise functions can take up to 50% of */
180 /* all the time required when rendering and current compilers cannot */
181 /* easily optimise them efficiently without some help from programmers!) */
182 /*****************************************************************************/
183
184 #if USE_FASTER_NOISE
185 }
186 #include "fasternoise.h"
187 namespace pov
188 {
189 #ifndef FASTER_NOISE_INIT
190 #define FASTER_NOISE_INIT()
191 #endif
192 #else
193 #if !defined(USE_AVX_FMA4_FOR_NOISE)
194 #define OriNoise Noise
195 #define OriDNoise DNoise
196 #endif
197
198 #define FASTER_NOISE_INIT()
199 #endif
200
201 /*****************************************************************************
202 *
203 * FUNCTION
204 *
205 * Initialize_Noise()
206 *
207 * INPUT
208 *
209 * OUTPUT
210 *
211 * RETURNS
212 *
213 * AUTHOR
214 *
215 * POV-Ray Team
216 *
217 * DESCRIPTION
218 *
219 * CHANGES
220 *
221 ******************************************************************************/
222
Initialize_Noise()223 void Initialize_Noise()
224 {
225 #if defined(USE_AVX_FMA4_FOR_NOISE)
226 Initialise_NoiseDispatch();
227 #endif
228 InitTextureTable();
229
230 /* are - initialize Perlin style noise function */
231 InitSolidNoise();
232
233 sintab = reinterpret_cast<DBL *>(POV_MALLOC(SINTABSIZE * sizeof(DBL), "sine table"));
234
235 for(int i = 0 ; i < 267 ; i++)
236 RTable[(i * 2) + 1] = RTable[i * 2] * 0.5;
237 // Debug_Info("%.10f %.10f\n", (DBL)(RTable[i * 2] - (DBL)((float)(RTable[i * 2]))), (DBL)(RTable[(i * 2) + 1] - (DBL)((float)(RTable[(i * 2) + 1]))));
238
239 for(int i = 0 ; i < SINTABSIZE ; i++)
240 sintab[i] = sin((DBL)i / SINTABSIZE * TWO_M_PI);
241 }
242
Initialize_Waves(vector<double> & waveFrequencies,vector<Vector3d> & waveSources,unsigned int numberOfWaves)243 void Initialize_Waves(vector<double>& waveFrequencies, vector<Vector3d>& waveSources, unsigned int numberOfWaves)
244 {
245 Vector3d point;
246
247 waveFrequencies.clear();
248 waveSources.clear();
249
250 for(int i = 0, next_rand = -560851967; i < numberOfWaves ; i++)
251 {
252 point = Vector3d((double)i,0.0,0.0);
253 DNoise(*point, *point);
254 waveSources.push_back(point.normalized());
255
256 next_rand = next_rand * 1812433253L + 12345L;
257 waveFrequencies.push_back((double((int)(next_rand >> 16) & 0x7FFF) * 0.000030518509476) + 0.01);
258 }
259 }
260
261
262
263 /*****************************************************************************
264 *
265 * FUNCTION
266 *
267 * InitTextureTable()
268 *
269 * INPUT
270 *
271 * OUTPUT
272 *
273 * RETURNS
274 *
275 * AUTHOR
276 *
277 * POV-Ray Team
278 *
279 * DESCRIPTION
280 *
281 * CHANGES
282 *
283 ******************************************************************************/
284
InitTextureTable()285 static void InitTextureTable()
286 {
287 unsigned short j, temp;
288 int i;
289 int next_rand = 0;
290
291 #ifdef DYNAMIC_HASHTABLE
292 hashTable = reinterpret_cast<unsigned short *>(POV_MALLOC(8192*sizeof(unsigned short), "hash table"));
293 #endif
294
295 for(i = 0; i < 4096; i++)
296 hashTable[i] = i;
297
298 for(i = 4095; i >= 0; i--)
299 {
300 next_rand = next_rand * 1812433253L + 12345L;
301 j = ((int)(next_rand >> 16) & 0x7FFF) % 4096;
302 temp = hashTable[i];
303 hashTable[i] = hashTable[j];
304 hashTable[j] = temp;
305 }
306
307 for(i = 0; i < 4096; i++)
308 hashTable[4096 + i] = hashTable[i];
309
310 FASTER_NOISE_INIT();
311 }
312
313
314
315 /*****************************************************************************
316 *
317 * FUNCTION
318 *
319 * Free_Noise_Tables()
320 *
321 * INPUT
322 *
323 * OUTPUT
324 *
325 * RETURNS
326 *
327 * AUTHOR
328 *
329 * POV-Ray Team
330 *
331 * DESCRIPTION
332 *
333 * CHANGES
334 *
335 ******************************************************************************/
336
Free_Noise_Tables()337 void Free_Noise_Tables()
338 {
339 if (sintab != NULL)
340 {
341 POV_FREE(sintab);
342
343 sintab = NULL;
344
345 #ifdef DYNAMIC_HASHTABLE
346 POV_FREE(hashTable);
347 hashTable = NULL;
348 #endif
349 }
350 }
351
352
353
354 /*****************************************************************************
355 *
356 * FUNCTION
357 *
358 * Noise
359 *
360 * INPUT
361 *
362 * EPoint -- 3-D point at which noise is evaluated
363 *
364 * OUTPUT
365 *
366 * RETURNS
367 *
368 * DBL noise value
369 *
370 * AUTHOR
371 *
372 * Robert Skinner based on Ken Perlin
373 *
374 * DESCRIPTION
375 *
376 * CHANGES
377 * Modified by AAC to ensure uniformly distributed clamped values
378 * between 0 and 1.0...
379 *
380 * Feb 8, 2001: modified function based on MegaPov 0.7 to remove
381 * bugs that showed up when noise was translated.
382 *
383 ******************************************************************************/
384
OriNoise(const VECTOR EPoint,int noise_generator)385 DBL OriNoise(const VECTOR EPoint, int noise_generator)
386 {
387 DBL x, y, z;
388 DBL *mp;
389 int tmp;
390 int ix, iy, iz;
391 int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
392
393 DBL sx, sy, sz, tx, ty, tz;
394 DBL sum = 0.0;
395
396 DBL x_ix, x_jx, y_iy, y_jy, z_iz, z_jz, txty, sxty, txsy, sxsy;
397
398 // TODO FIXME - global statistics reference
399 // Stats[Calls_To_Noise]++;
400
401 if (noise_generator==3)
402 {
403 // The 1.59 and 0.985 are to correct for some biasing problems with
404 // the random # generator used to create the noise tables. Final
405 // range of values is about 5.0e-4 below 0.0 and above 1.0. Mean
406 // value is 0.49 (ideally it would be 0.5).
407 sum = 0.5 * (1.59 * SolidNoise(EPoint) + 0.985);
408
409 // Clamp final value to 0-1 range
410 if (sum < 0.0) sum = 0.0;
411 if (sum > 1.0) sum = 1.0;
412
413 return sum;
414 }
415
416 x = EPoint[X];
417 y = EPoint[Y];
418 z = EPoint[Z];
419
420 /* its equivalent integer lattice point. */
421 /* ix = (int)x; iy = (int)y; iz = (long)z; */
422 /* JB fix for the range problem */
423 tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
424 ix = (int)((tmp-MINX)&0xFFF);
425 x_ix = x-tmp;
426
427 tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
428 iy = (int)((tmp-MINY)&0xFFF);
429 y_iy = y-tmp;
430
431 tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
432 iz = (int)((tmp-MINZ)&0xFFF);
433 z_iz = z-tmp;
434
435 x_jx = x_ix-1; y_jy = y_iy-1; z_jz = z_iz-1;
436
437 sx = SCURVE(x_ix); sy = SCURVE(y_iy); sz = SCURVE(z_iz);
438
439 /* the complement values of sx,sy,sz */
440 tx = 1 - sx; ty = 1 - sy; tz = 1 - sz;
441
442 /*
443 * interpolate!
444 */
445 txty = tx * ty;
446 sxty = sx * ty;
447 txsy = tx * sy;
448 sxsy = sx * sy;
449 ixiy_hash = Hash2d(ix, iy);
450 jxiy_hash = Hash2d(ix + 1, iy);
451 ixjy_hash = Hash2d(ix, iy + 1);
452 jxjy_hash = Hash2d(ix + 1, iy + 1);
453
454 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
455 sum = INCRSUMP(mp, (txty*tz), x_ix, y_iy, z_iz);
456
457 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
458 sum += INCRSUMP(mp, (sxty*tz), x_jx, y_iy, z_iz);
459
460 mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
461 sum += INCRSUMP(mp, (txsy*tz), x_ix, y_jy, z_iz);
462
463 mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
464 sum += INCRSUMP(mp, (sxsy*tz), x_jx, y_jy, z_iz);
465
466 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
467 sum += INCRSUMP(mp, (txty*sz), x_ix, y_iy, z_jz);
468
469 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
470 sum += INCRSUMP(mp, (sxty*sz), x_jx, y_iy, z_jz);
471
472 mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
473 sum += INCRSUMP(mp, (txsy*sz), x_ix, y_jy, z_jz);
474
475 mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
476 sum += INCRSUMP(mp, (sxsy*sz), x_jx, y_jy, z_jz);
477
478 if(noise_generator==2)
479 {
480 /* details of range here:
481 Min, max: -1.05242, 0.988997
482 Mean: -0.0191481, Median: -0.535493, Std Dev: 0.256828
483
484 We want to change it to as close to [0,1] as possible.
485 */
486 sum += 1.05242;
487 sum *= 0.48985582;
488 /*sum *= 0.5;
489 sum += 0.5;*/
490
491 if (sum < 0.0)
492 sum = 0.0;
493 if (sum > 1.0)
494 sum = 1.0;
495 }
496 else
497 {
498 sum = sum + 0.5; /* range at this point -0.5 - 0.5... */
499
500 if (sum < 0.0)
501 sum = 0.0;
502 if (sum > 1.0)
503 sum = 1.0;
504 }
505
506 return (sum);
507 }
508
509
510
511 /*****************************************************************************
512 *
513 * FUNCTION
514 *
515 * DNoise
516 *
517 * INPUT
518 *
519 * EPoint -- 3-D point at which noise is evaluated
520 *
521 * OUTPUT
522 *
523 * VECTOR result
524 *
525 * RETURNS
526 *
527 * AUTHOR
528 *
529 * Robert Skinner based on Ken Perlin
530 *
531 * DESCRIPTION
532 * Vector-valued version of "Noise"
533 *
534 * CHANGES
535 * Modified by AAC to ensure uniformly distributed clamped values
536 * between 0 and 1.0...
537 *
538 * Feb 8, 2001: modified function based on MegaPov 0.7 to remove
539 * bugs that showed up when noise was translated.
540 *
541 ******************************************************************************/
542
OriDNoise(VECTOR result,const VECTOR EPoint)543 void OriDNoise(VECTOR result, const VECTOR EPoint)
544 {
545 DBL x, y, z;
546 DBL *mp;
547 int tmp;
548 int ix, iy, iz;
549 int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
550 DBL x_ix, x_jx, y_iy, y_jy, z_iz, z_jz;
551 DBL s;
552 DBL sx, sy, sz, tx, ty, tz;
553 DBL txty, sxty, txsy, sxsy;
554
555 // TODO FIXME - global statistics reference
556 // Stats[Calls_To_DNoise]++;
557
558 x = EPoint[X];
559 y = EPoint[Y];
560 z = EPoint[Z];
561
562 /* its equivalent integer lattice point. */
563 /*ix = (int)x; iy = (int)y; iz = (int)z;
564 x_ix = x - ix; y_iy = y - iy; z_iz = z - iz;*/
565 /* JB fix for the range problem */
566 tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
567 ix = (int)((tmp-MINX)&0xFFF);
568 x_ix = x-tmp;
569
570 tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
571 iy = (int)((tmp-MINY)&0xFFF);
572 y_iy = y-tmp;
573
574 tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
575 iz = (int)((tmp-MINZ)&0xFFF);
576 z_iz = z-tmp;
577
578 x_jx = x_ix-1; y_jy = y_iy-1; z_jz = z_iz-1;
579
580 sx = SCURVE(x_ix); sy = SCURVE(y_iy); sz = SCURVE(z_iz);
581
582 /* the complement values of sx,sy,sz */
583 tx = 1 - sx; ty = 1 - sy; tz = 1 - sz;
584
585 /*
586 * interpolate!
587 */
588 txty = tx * ty;
589 sxty = sx * ty;
590 txsy = tx * sy;
591 sxsy = sx * sy;
592 ixiy_hash = Hash2d(ix, iy);
593 jxiy_hash = Hash2d(ix + 1, iy);
594 ixjy_hash = Hash2d(ix, iy + 1);
595 jxjy_hash = Hash2d(ix + 1, iy + 1);
596
597 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
598 s = txty*tz;
599 result[X] = INCRSUMP(mp, s, x_ix, y_iy, z_iz);
600 mp += 8;
601 result[Y] = INCRSUMP(mp, s, x_ix, y_iy, z_iz);
602 mp += 8;
603 result[Z] = INCRSUMP(mp, s, x_ix, y_iy, z_iz);
604
605 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
606 s = sxty*tz;
607 result[X] += INCRSUMP(mp, s, x_jx, y_iy, z_iz);
608 mp += 8;
609 result[Y] += INCRSUMP(mp, s, x_jx, y_iy, z_iz);
610 mp += 8;
611 result[Z] += INCRSUMP(mp, s, x_jx, y_iy, z_iz);
612
613 mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
614 s = sxsy*tz;
615 result[X] += INCRSUMP(mp, s, x_jx, y_jy, z_iz);
616 mp += 8;
617 result[Y] += INCRSUMP(mp, s, x_jx, y_jy, z_iz);
618 mp += 8;
619 result[Z] += INCRSUMP(mp, s, x_jx, y_jy, z_iz);
620
621 mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
622 s = txsy*tz;
623 result[X] += INCRSUMP(mp, s, x_ix, y_jy, z_iz);
624 mp += 8;
625 result[Y] += INCRSUMP(mp, s, x_ix, y_jy, z_iz);
626 mp += 8;
627 result[Z] += INCRSUMP(mp, s, x_ix, y_jy, z_iz);
628
629 mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
630 s = txsy*sz;
631 result[X] += INCRSUMP(mp, s, x_ix, y_jy, z_jz);
632 mp += 8;
633 result[Y] += INCRSUMP(mp, s, x_ix, y_jy, z_jz);
634 mp += 8;
635 result[Z] += INCRSUMP(mp, s, x_ix, y_jy, z_jz);
636
637 mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
638 s = sxsy*sz;
639 result[X] += INCRSUMP(mp, s, x_jx, y_jy, z_jz);
640 mp += 8;
641 result[Y] += INCRSUMP(mp, s, x_jx, y_jy, z_jz);
642 mp += 8;
643 result[Z] += INCRSUMP(mp, s, x_jx, y_jy, z_jz);
644
645 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
646 s = sxty*sz;
647 result[X] += INCRSUMP(mp, s, x_jx, y_iy, z_jz);
648 mp += 8;
649 result[Y] += INCRSUMP(mp, s, x_jx, y_iy, z_jz);
650 mp += 8;
651 result[Z] += INCRSUMP(mp, s, x_jx, y_iy, z_jz);
652
653 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
654 s = txty*sz;
655 result[X] += INCRSUMP(mp, s, x_ix, y_iy, z_jz);
656 mp += 8;
657 result[Y] += INCRSUMP(mp, s, x_ix, y_iy, z_jz);
658 mp += 8;
659 result[Z] += INCRSUMP(mp, s, x_ix, y_iy, z_jz);
660 }
661
662 // Note that the value of NoiseEntries must be a power of 2. This
663 // is because bit masking using (NoiseEntries-1) is used to rescale
664 // the input values to the noise function.
665 const int NoiseEntries = 2048;
666 static int NoisePermutation[2*(NoiseEntries+1)];
667 static VECTOR NoiseGradients[2*(NoiseEntries+1)];
668
669 const DBL ROLLOVER = 10000000.023157213;
670
671 static void
InitSolidNoise(void)672 InitSolidNoise(void)
673 {
674 int i, j, k;
675 VECTOR v;
676 DBL s;
677
678 // Create an array of random gradient vectors uniformly on the unit sphere
679 int next_rand = 1;
680 for(i = 0; i < NoiseEntries; i++)
681 {
682 do
683 {
684 for(j = 0; j < 3; j++)
685 {
686 next_rand = next_rand * 1812433253L + 12345L;
687 v[j] = (DBL)((((int)(next_rand >> 16) & 0x7FFF) % (NoiseEntries << 1)) - NoiseEntries) / (DBL)NoiseEntries;
688 }
689 s = VSumSqr(v);
690 } while ((s > 1.0) || (s < 1.0e-5));
691 s = 1.0 / sqrt(s);
692 VScaleEq(v, s);
693
694 Assign_Vector(NoiseGradients[i], v);
695 }
696 // Create a pseudorandom permutation of [0..NoiseEntries]
697 for(i = 0; i < NoiseEntries; i++)
698 NoisePermutation[i] = i;
699 for(i = NoiseEntries; i > 0; i -= 2)
700 {
701 k = NoisePermutation[i];
702 next_rand = next_rand * 1812433253L + 12345L;
703 j = ((int)(next_rand >> 16) & 0x7FFF) % NoiseEntries;
704 NoisePermutation[i] = NoisePermutation[j];
705 NoisePermutation[j] = k;
706 }
707 // Duplicate the entries so that we don't need a modulus operation
708 // to get a value out.
709 for(i = 0; i < NoiseEntries + 2; i++)
710 {
711 NoisePermutation[NoiseEntries + i] = NoisePermutation[i];
712 Assign_Vector(NoiseGradients[NoiseEntries+i], NoiseGradients[i]);
713 }
714 }
715
716 // Hermite curve from 0 to 1. Makes a nice smooth transition of values.
717 static DBL inline
SCurve(DBL t)718 SCurve(DBL t)
719 {
720 return (t * t * (3.0 - 2.0 * t));
721 }
722
723
724 // Linear interpolation between a and b, as the value of t goes from 0 to 1.
725 static DBL inline
Lerp(DBL t,DBL a,DBL b)726 Lerp(DBL t, DBL a, DBL b)
727 {
728 return ((a) + (t) * ((b) - (a)));
729 }
730
731 // Linear interpolation between a and b, as the value of t goes from 0 to 1.
732 static void inline
VLerp(VECTOR v,DBL t,const VECTOR a,const VECTOR b)733 VLerp(VECTOR v, DBL t, const VECTOR a, const VECTOR b)
734 {
735 v[X] = Lerp(t, a[X], b[X]);
736 v[Y] = Lerp(t, a[Y], b[Y]);
737 v[Z] = Lerp(t, a[Z], b[Z]);
738 }
739
740 static void inline
SetupSolidNoise(const VECTOR P,int i,int & b0,int & b1,DBL & r0,DBL & r1)741 SetupSolidNoise(const VECTOR P, int i, int &b0, int &b1, DBL &r0, DBL &r1)
742 {
743 DBL t = P[i] + ROLLOVER;
744
745 int it = (int)floor(t);
746 b0 = it & (NoiseEntries - 1);
747 b1 = (b0 + 1) & (NoiseEntries - 1);
748 r0 = t - it;
749 r1 = r0 - 1.0;
750 }
751
752 static DBL inline
NoiseValueAt(const VECTOR q,DBL rx,DBL ry,DBL rz)753 NoiseValueAt(const VECTOR q, DBL rx, DBL ry, DBL rz)
754 {
755 return (rx * q[X] + ry * q[Y] + rz * q[Z]);
756 }
757
758 static DBL
SolidNoise(const VECTOR P)759 SolidNoise(const VECTOR P)
760 {
761 int bx0, bx1, by0, by1, bz0, bz1;
762 int b00, b10, b01, b11;
763 DBL rx0, rx1, ry0, ry1, rz0, rz1;
764 DBL sx, sy, sz, a, b, c, d, t, u, v;
765 int i, j;
766
767 SetupSolidNoise(P, 0, bx0, bx1, rx0, rx1);
768 SetupSolidNoise(P, 1, by0, by1, ry0, ry1);
769 SetupSolidNoise(P, 2, bz0, bz1, rz0, rz1);
770
771 i = NoisePermutation[bx0];
772 j = NoisePermutation[bx1];
773
774 b00 = NoisePermutation[i + by0];
775 b10 = NoisePermutation[j + by0];
776 b01 = NoisePermutation[i + by1];
777 b11 = NoisePermutation[j + by1];
778
779 sx = SCurve(rx0);
780 sy = SCurve(ry0);
781 sz = SCurve(rz0);
782
783 u = NoiseValueAt(NoiseGradients[b00 + bz0], rx0, ry0, rz0);
784 v = NoiseValueAt(NoiseGradients[b10 + bz0], rx1, ry0, rz0);
785 a = Lerp(sx, u, v);
786
787 u = NoiseValueAt(NoiseGradients[b01 + bz0], rx0, ry1, rz0);
788 v = NoiseValueAt(NoiseGradients[b11 + bz0], rx1, ry1, rz0);
789 b = Lerp(sx, u, v);
790
791 c = Lerp(sy, a, b);
792
793 u = NoiseValueAt(NoiseGradients[b00 + bz1], rx0, ry0, rz1);
794 v = NoiseValueAt(NoiseGradients[b10 + bz1], rx1, ry0, rz1);
795 a = Lerp(sx, u, v);
796
797 u = NoiseValueAt(NoiseGradients[b01 + bz1], rx0, ry1, rz1);
798 v = NoiseValueAt(NoiseGradients[b11 + bz1], rx1, ry1, rz1);
799 b = Lerp(sx, u, v);
800
801 d = Lerp(sy, a, b);
802
803 t = Lerp(sz, c, d);
804
805 return t;
806 }
807
808 static void
SolidDNoise(const VECTOR P,VECTOR D)809 SolidDNoise(const VECTOR P, VECTOR D)
810 {
811 int bx0, bx1, by0, by1, bz0, bz1;
812 int b00, b10, b01, b11;
813 DBL rx0, rx1, ry0, ry1, rz0, rz1;
814 DBL sx, sy, sz;
815 VECTOR a, b, c, d, u, v;
816 int i, j;
817
818 SetupSolidNoise(P, 0, bx0, bx1, rx0, rx1);
819 SetupSolidNoise(P, 1, by0, by1, ry0, ry1);
820 SetupSolidNoise(P, 2, bz0, bz1, rz0, rz1);
821
822 i = NoisePermutation[bx0];
823 j = NoisePermutation[bx1];
824
825 b00 = NoisePermutation[i + by0];
826 b10 = NoisePermutation[j + by0];
827 b01 = NoisePermutation[i + by1];
828 b11 = NoisePermutation[j + by1];
829
830 sx = SCurve(rx0);
831 sy = SCurve(ry0);
832 sz = SCurve(rz0);
833
834
835 Assign_Vector(u, NoiseGradients[b00 + bz0]);
836 Assign_Vector(v, NoiseGradients[b10 + bz0]);
837 VLerp(a, sx, u, v);
838
839 Assign_Vector(u, NoiseGradients[b01 + bz0]);
840 Assign_Vector(v, NoiseGradients[b11 + bz0]);
841 VLerp(b, sx, u, v);
842
843 VLerp(c, sy, a, b);
844
845 Assign_Vector(u, NoiseGradients[b00 + bz1]);
846 Assign_Vector(v, NoiseGradients[b10 + bz1]);
847 VLerp(a, sx, u, v);
848
849 Assign_Vector(u, NoiseGradients[b01 + bz1]);
850 Assign_Vector(v, NoiseGradients[b11 + bz1]);
851 VLerp(b, sx, u, v);
852
853 VLerp(d, sy, a, b);
854
855 VLerp(D, sz, c, d);
856 }
857
858
859 /*****************************************************************************
860 *
861 * FUNCTION
862 *
863 * Turbulence
864 *
865 * INPUT
866 *
867 * EPoint -- Point at which turb is evaluated.
868 * Turb -- Parameters for fbm calculations.
869 *
870 * OUTPUT
871 *
872 * RETURNS
873 *
874 * DBL result
875 *
876 * AUTHOR
877 *
878 * POV-Ray Team
879 *
880 * DESCRIPTION : Computes a Fractal Brownian Motion turbulence value
881 * using repeated calls to a Perlin Noise function.
882 *
883 * CHANGES
884 * ??? ???? : Updated with varible Octaves, Lambda, & Omega by [DMF]
885 *
886 ******************************************************************************/
887
Turbulence(const VECTOR EPoint,const TURB * Turb,int noise_generator)888 DBL Turbulence(const VECTOR EPoint, const TURB *Turb, int noise_generator)
889 {
890 int i;
891 DBL Lambda, Omega, l, o, value;
892 VECTOR temp;
893 int Octaves=Turb->Octaves;
894
895 if (noise_generator>1)
896 {
897 value = (2.0 * Noise(EPoint, noise_generator) - 0.5);
898 value = min(max(value,0.0),1.0);
899 } else {
900 value = Noise(EPoint, noise_generator);
901 }
902
903 l = Lambda = Turb->Lambda;
904 o = Omega = Turb->Omega;
905
906 for (i = 2; i <= Octaves; i++)
907 {
908 VScale(temp,EPoint,l);
909 if (noise_generator>1)
910 value += o * (2.0 * Noise(temp, noise_generator) - 0.5);
911 else
912 value += o * Noise(temp, noise_generator);
913 if (i < Octaves)
914 {
915 l *= Lambda;
916 o *= Omega;
917 }
918 }
919 return (value);
920 }
921
922
923
924 /*****************************************************************************
925 *
926 * FUNCTION
927 *
928 * DTurbulence
929 *
930 * INPUT
931 *
932 * EPoint -- Point at which turb is evaluated.
933 * Turb -- Parameters for fmb calculations.
934 *
935 * OUTPUT
936 *
937 * result -- Vector valued turbulence
938 *
939 * RETURNS
940 *
941 * AUTHOR
942 *
943 * POV-Ray Team
944 *
945 * DESCRIPTION : Computes a Fractal Brownian Motion turbulence value
946 * using repeated calls to a Perlin DNoise function.
947 *
948 * CHANGES
949 * ??? ???? : Updated with varible Octaves, Lambda, & Omega by [DMF]
950 *
951 ******************************************************************************/
952
953
DTurbulence(VECTOR result,const VECTOR EPoint,const TURB * Turb)954 void DTurbulence(VECTOR result, const VECTOR EPoint, const TURB *Turb)
955 {
956 DBL Omega, Lambda;
957 int i;
958 DBL l, o;
959 VECTOR value, temp;
960 int Octaves=Turb->Octaves;
961
962 result[X] = result[Y] = result[Z] = 0.0;
963 value[X] = value[Y] = value[Z] = 0.0;
964
965 DNoise(result, EPoint);
966
967 l = Lambda = Turb->Lambda;
968 o = Omega = Turb->Omega;
969
970 for (i = 2; i <= Octaves; i++)
971 {
972 VScale(temp,EPoint,l);
973
974 DNoise(value, temp);
975 result[X] += o * value[X];
976 result[Y] += o * value[Y];
977 result[Z] += o * value[Z];
978 if (i < Octaves)
979 {
980 l *= Lambda;
981 o *= Omega;
982 }
983 }
984 }
985
986
987
988 /*****************************************************************************
989 *
990 * FUNCTION
991 *
992 * cycloidal
993 *
994 * INPUT
995 *
996 * DBL value
997 *
998 * OUTPUT
999 *
1000 * RETURNS
1001 *
1002 * DBL result
1003 *
1004 * AUTHOR
1005 *
1006 * POV-Ray Team
1007 *
1008 * DESCRIPTION
1009 *
1010 * CHANGES
1011 *
1012 ******************************************************************************/
1013
cycloidal(DBL value)1014 DBL cycloidal(DBL value)
1015 {
1016 if (value >= 0.0)
1017 {
1018 return sin((DBL) (((value - floor(value)) * 50000.0)) / 50000.0 * TWO_M_PI) ;
1019 }
1020 else
1021 {
1022 return 0.0-sin((DBL) (((0.0 - (value + floor(0.0 - value))) * 50000.0)) / 50000.0 * TWO_M_PI);
1023 }
1024 }
1025
1026
1027
1028 /*****************************************************************************
1029 *
1030 * FUNCTION
1031 *
1032 * Triangle_Wave
1033 *
1034 * INPUT
1035 *
1036 * DBL value
1037 *
1038 * OUTPUT
1039 *
1040 * RETURNS
1041 *
1042 * DBL result
1043 *
1044 * AUTHOR
1045 *
1046 * POV-Ray Team
1047 *
1048 * DESCRIPTION
1049 *
1050 * CHANGES
1051 *
1052 ******************************************************************************/
1053
Triangle_Wave(DBL value)1054 DBL Triangle_Wave(DBL value)
1055 {
1056 DBL offset;
1057
1058 if (value >= 0.0)
1059 {
1060 offset = value - floor(value);
1061 }
1062 else
1063 {
1064 offset = value + 1.0 + floor(fabs(value));
1065 }
1066 if (offset >= 0.5)
1067 {
1068 return (2.0 * (1.0 - offset));
1069 }
1070 else
1071 {
1072 return (2.0 * offset);
1073 }
1074 }
1075
1076
1077
1078 /*****************************************************************************
1079 *
1080 * FUNCTION
1081 *
1082 * INPUT
1083 *
1084 * OUTPUT
1085 *
1086 * RETURNS
1087 *
1088 * AUTHOR
1089 *
1090 * POV-Ray Team
1091 *
1092 * DESCRIPTION
1093 *
1094 * CHANGES
1095 *
1096 ******************************************************************************/
1097
Transform_Textures(TEXTURE * Textures,const TRANSFORM * Trans)1098 void Transform_Textures(TEXTURE *Textures, const TRANSFORM *Trans)
1099 {
1100 TEXTURE *Layer;
1101
1102 for (Layer = Textures; Layer != NULL; Layer = reinterpret_cast<TEXTURE *>(Layer->Next))
1103 {
1104 if (Layer->Type == PLAIN_PATTERN)
1105 {
1106 Transform_Tpattern(reinterpret_cast<TPATTERN *>(Layer->Pigment), Trans);
1107 Transform_Tpattern(reinterpret_cast<TPATTERN *>(Layer->Tnormal), Trans);
1108 }
1109 else
1110 {
1111 Transform_Tpattern(reinterpret_cast<TPATTERN *>(Layer), Trans);
1112 }
1113 }
1114 }
1115
1116
1117
1118 /*****************************************************************************
1119 *
1120 * FUNCTION
1121 *
1122 * INPUT
1123 *
1124 * OUTPUT
1125 *
1126 * RETURNS
1127 *
1128 * AUTHOR
1129 *
1130 * POV-Ray Team
1131 *
1132 * DESCRIPTION
1133 *
1134 * CHANGES
1135 * 6/27/98 MBP Added initializers for reflection blur
1136 * 8/27/98 MBP Added initializers for angle-based reflectivity
1137 *
1138 ******************************************************************************/
1139
Create_Finish()1140 FINISH *Create_Finish()
1141 {
1142 FINISH *New;
1143
1144 New = reinterpret_cast<FINISH *>(POV_MALLOC(sizeof (FINISH), "finish"));
1145
1146 New->Ambient.set(0.1);
1147 New->Emission.clear();
1148 New->Reflection_Max.clear();
1149 New->Reflection_Min.clear();
1150
1151 New->Reflection_Type = 0;
1152 New->Reflection_Falloff = 1; /* Added by MBP 8/27/98 */
1153 New->Diffuse = 0.6;
1154 New->DiffuseBack = 0.0;
1155 New->RawDiffuse = New->Diffuse;
1156 New->RawDiffuseBack = New->DiffuseBack;
1157 New->Brilliance = 1.0;
1158 New->Phong = 0.0;
1159 New->Phong_Size = 40.0;
1160 New->Specular = 0.0;
1161 New->Roughness = 1.0 / 0.05;
1162
1163 New->Crand = 0.0;
1164
1165 New->Metallic = 0.0;
1166
1167 New->Irid = 0.0;
1168 New->Irid_Film_Thickness = 0.0;
1169 New->Irid_Turb = 0.0;
1170 New->Temp_Caustics = -1.0;
1171 New->Temp_IOR = -1.0;
1172 New->Temp_Dispersion = 1.0;
1173 New->Temp_Refract = 1.0;
1174 New->Reflect_Exp = 1.0;
1175 New->Reflect_Metallic = 0.0;
1176 /* Added Dec 19 1999 by NK */
1177 New->Conserve_Energy = false;
1178
1179 New->UseSubsurface = false;
1180 New->SubsurfaceTranslucency.clear();
1181 New->SubsurfaceAnisotropy.clear();
1182
1183 return(New);
1184 }
1185
1186
1187
1188 /*****************************************************************************
1189 *
1190 * FUNCTION
1191 *
1192 * INPUT
1193 *
1194 * OUTPUT
1195 *
1196 * RETURNS
1197 *
1198 * AUTHOR
1199 *
1200 * POV-Ray Team
1201 *
1202 * DESCRIPTION
1203 *
1204 * CHANGES
1205 *
1206 ******************************************************************************/
1207
Copy_Finish(const FINISH * Old)1208 FINISH *Copy_Finish(const FINISH *Old)
1209 {
1210 FINISH *New;
1211
1212 if (Old != NULL)
1213 {
1214 New = Create_Finish();
1215 *New = *Old;
1216 }
1217 else
1218 New = NULL;
1219 return (New);
1220 }
1221
1222
1223
1224 /*****************************************************************************
1225 *
1226 * FUNCTION
1227 *
1228 * INPUT
1229 *
1230 * OUTPUT
1231 *
1232 * RETURNS
1233 *
1234 * AUTHOR
1235 *
1236 * POV-Ray Team
1237 *
1238 * DESCRIPTION
1239 *
1240 * CHANGES
1241 *
1242 ******************************************************************************/
1243
Create_Texture()1244 TEXTURE *Create_Texture()
1245 {
1246 TEXTURE *New;
1247
1248 New = reinterpret_cast<TEXTURE *>(POV_MALLOC(sizeof (TEXTURE), "texture"));
1249
1250 Init_TPat_Fields(reinterpret_cast<TPATTERN *>(New));
1251
1252 New->References = 1;
1253
1254 New->Type = PLAIN_PATTERN;
1255 New->Flags |= NO_FLAGS; // [CLi] Already initialized by Init_TPat_Fields
1256
1257 New->Pigment = NULL;
1258 New->Tnormal = NULL;
1259 New->Finish = NULL;
1260
1261 New->Next = NULL;
1262 New->Next_Material = NULL;
1263
1264 New->Materials = NULL;
1265 New->Num_Of_Mats = 0;
1266
1267 return (New);
1268 }
1269
1270
1271 /*****************************************************************************
1272 *
1273 * FUNCTION
1274 *
1275 * INPUT
1276 *
1277 * OUTPUT
1278 *
1279 * RETURNS
1280 *
1281 * AUTHOR
1282 *
1283 * POV-Ray Team
1284 *
1285 * DESCRIPTION
1286 *
1287 * CHANGES
1288 *
1289 ******************************************************************************/
1290
Copy_Texture_Pointer(TEXTURE * Texture)1291 TEXTURE *Copy_Texture_Pointer(TEXTURE *Texture)
1292 {
1293 if (Texture != NULL)
1294 {
1295 Texture->References++;
1296 }
1297
1298 return(Texture);
1299 }
1300
1301
1302
1303
1304 /*****************************************************************************
1305 *
1306 * FUNCTION
1307 *
1308 * INPUT
1309 *
1310 * OUTPUT
1311 *
1312 * RETURNS
1313 *
1314 * AUTHOR
1315 *
1316 * POV-Ray Team
1317 *
1318 * DESCRIPTION
1319 *
1320 * CHANGES
1321 *
1322 ******************************************************************************/
1323
Copy_Textures(const TEXTURE * Textures)1324 TEXTURE *Copy_Textures(const TEXTURE *Textures)
1325 {
1326 TEXTURE *New, *First, *Previous;
1327 const TEXTURE *Layer;
1328
1329 Previous = First = NULL;
1330
1331 for (Layer = Textures; Layer != NULL; Layer = reinterpret_cast<const TEXTURE *>(Layer->Next))
1332 {
1333 New = Create_Texture();
1334 Copy_TPat_Fields (reinterpret_cast<TPATTERN *>(New), reinterpret_cast<const TPATTERN *>(Layer));
1335
1336 /* Mesh copies a texture pointer that already has multiple
1337 references. We just want a clean copy, not a copy
1338 that's multply referenced.
1339 */
1340
1341 New->References = 1;
1342
1343 switch (Layer->Type)
1344 {
1345 case PLAIN_PATTERN:
1346 New->Pigment = Copy_Pigment(Layer->Pigment);
1347 New->Tnormal = Copy_Tnormal(Layer->Tnormal);
1348 New->Finish = Copy_Finish(Layer->Finish);
1349
1350 break;
1351
1352 case BITMAP_PATTERN:
1353
1354 New->Materials = Copy_Materials(Layer->Materials);
1355 New->Num_Of_Mats = Layer->Num_Of_Mats;
1356
1357 // Not needed. Copied by Copy_TPat_Fields:
1358 // New->Vals.Image = Copy_Image(Layer->Vals.Image);
1359
1360 break;
1361 }
1362
1363 if (First == NULL)
1364 {
1365 First = New;
1366 }
1367
1368 if (Previous != NULL)
1369 {
1370 Previous->Next = reinterpret_cast<TPATTERN *>(New);
1371 }
1372
1373 Previous = New;
1374 }
1375
1376 return (First);
1377 }
1378
1379
1380
1381 /*****************************************************************************
1382 *
1383 * FUNCTION
1384 *
1385 * INPUT
1386 *
1387 * OUTPUT
1388 *
1389 * RETURNS
1390 *
1391 * AUTHOR
1392 *
1393 * POV-Ray Team
1394 *
1395 * DESCRIPTION
1396 *
1397 * CHANGES
1398 *
1399 ******************************************************************************/
1400
Copy_Materials(TEXTURE * Old)1401 static TEXTURE *Copy_Materials(TEXTURE *Old)
1402 {
1403 TEXTURE *New, *First, *Previous, *Material;
1404
1405 Previous = First = NULL;
1406
1407 for (Material = Old; Material != NULL; Material = Material->Next_Material)
1408 {
1409 New = Copy_Textures(Material);
1410
1411 if (First == NULL)
1412 {
1413 First = New;
1414 }
1415
1416 if (Previous != NULL)
1417 {
1418 Previous->Next_Material = New;
1419 }
1420
1421 Previous = New;
1422 }
1423
1424 return (First);
1425 }
1426
1427
1428
1429 /*****************************************************************************
1430 *
1431 * FUNCTION
1432 *
1433 * INPUT
1434 *
1435 * OUTPUT
1436 *
1437 * RETURNS
1438 *
1439 * AUTHOR
1440 *
1441 * POV-Ray Team
1442 *
1443 * DESCRIPTION
1444 *
1445 * CHANGES
1446 *
1447 ******************************************************************************/
1448
Destroy_Textures(TEXTURE * Textures)1449 void Destroy_Textures(TEXTURE *Textures)
1450 {
1451 TEXTURE *Layer = Textures;
1452 TEXTURE *Mats;
1453 TEXTURE *Temp;
1454
1455 if ((Textures == NULL) || (--(Textures->References) > 0))
1456 {
1457 return;
1458 }
1459
1460 while (Layer != NULL)
1461 {
1462 Mats = Layer->Next_Material;
1463
1464 while (Mats != NULL)
1465 {
1466 Temp = Mats->Next_Material;
1467 Mats->Next_Material = NULL;
1468 Destroy_Textures(Mats);
1469 Mats = Temp;
1470 }
1471
1472 Destroy_TPat_Fields(reinterpret_cast<TPATTERN *>(Layer));
1473
1474 switch (Layer->Type)
1475 {
1476 case PLAIN_PATTERN:
1477
1478 Destroy_Pigment(Layer->Pigment);
1479 Destroy_Tnormal(Layer->Tnormal);
1480 Destroy_Finish(Layer->Finish);
1481
1482 break;
1483
1484
1485 case BITMAP_PATTERN:
1486
1487 Destroy_Textures(Layer->Materials);
1488 /*taken care of by Destroy_TPat_Fields*/
1489 /*Destroy_Image(Layer->Vals.Image);*/
1490
1491 break;
1492 }
1493
1494 Temp = reinterpret_cast<TEXTURE *>(Layer->Next);
1495 POV_FREE(Layer);
1496 Layer = Temp;
1497 }
1498 }
1499
1500
1501
1502 /*****************************************************************************
1503 *
1504 * FUNCTION
1505 *
1506 * INPUT
1507 *
1508 * OUTPUT
1509 *
1510 * RETURNS
1511 *
1512 * AUTHOR
1513 *
1514 * POV-Ray Team
1515 *
1516 * DESCRIPTION
1517 *
1518 * CHANGES
1519 *
1520 ******************************************************************************/
1521
Post_Textures(TEXTURE * Textures)1522 void Post_Textures(TEXTURE *Textures)
1523 {
1524 TEXTURE *Layer, *Material;
1525 int i;
1526 BLEND_MAP *Map;
1527
1528 if (Textures == NULL)
1529 {
1530 return;
1531 }
1532
1533 for (Layer = Textures; Layer != NULL; Layer = reinterpret_cast<TEXTURE *>(Layer->Next))
1534 {
1535 if (!((Layer->Flags) & POST_DONE))
1536 {
1537 switch (Layer->Type)
1538 {
1539 case PLAIN_PATTERN:
1540
1541 if(Layer->Tnormal)
1542 {
1543 Layer->Tnormal->Flags |=
1544 (Layer->Flags & DONT_SCALE_BUMPS_FLAG);
1545 }
1546 Post_Pigment(Layer->Pigment);
1547 Post_Tnormal(Layer->Tnormal);
1548
1549 break;
1550
1551 case BITMAP_PATTERN:
1552
1553 for (Material = Layer->Materials; Material != NULL; Material = Material->Next_Material)
1554
1555 Post_Textures(Material);
1556
1557 break;
1558 }
1559
1560 if ((Map=Layer->Blend_Map) != NULL)
1561 {
1562 for (i = 0; i < Map->Number_Of_Entries; i++)
1563 {
1564 Map->Blend_Map_Entries[i].Vals.Texture->Flags |=
1565 (Layer->Flags & DONT_SCALE_BUMPS_FLAG);
1566 Post_Textures(Map->Blend_Map_Entries[i].Vals.Texture);
1567 }
1568 }
1569 else
1570 {
1571 if (Layer->Type == AVERAGE_PATTERN)
1572 {
1573 throw POV_EXCEPTION_STRING("No texture map in averaged texture.");
1574 }
1575 }
1576 }
1577 }
1578 }
1579
1580
1581
1582 /*****************************************************************************
1583 *
1584 * FUNCTION
1585 *
1586 * Test_Opacity
1587 *
1588 * INPUT
1589 *
1590 * Object - Pointer to object
1591 *
1592 * OUTPUT
1593 *
1594 * RETURNS
1595 *
1596 * int - true, if opaque
1597 *
1598 * AUTHOR
1599 *
1600 * Dieter Bayer
1601 *
1602 * DESCRIPTION
1603 *
1604 * Test wether an object is opaque or not, i.e. wether the texture contains
1605 * a non-zero filter or alpha channel.
1606 *
1607 * CHANGES
1608 *
1609 * Aug 1994 : Creation.
1610 *
1611 * Oct 1994 : Added code to check for opaque image maps. [DB]
1612 *
1613 * Jun 1995 : Added code to check for alpha channel image maps. [DB]
1614 *
1615 ******************************************************************************/
1616
Test_Opacity(const TEXTURE * Texture)1617 int Test_Opacity(const TEXTURE *Texture)
1618 {
1619 int Opaque, Help;
1620 const TEXTURE *Layer, *Material;
1621
1622 if (Texture == NULL)
1623 {
1624 return(false);
1625 }
1626
1627 /* We assume that the object is not opaque. */
1628
1629 Opaque = false;
1630
1631 /* Test all layers. If at least one layer is opaque the object is opaque. */
1632
1633 for (Layer = Texture; Layer != NULL; Layer = reinterpret_cast<const TEXTURE *>(Layer->Next))
1634 {
1635 switch (Layer->Type)
1636 {
1637 case PLAIN_PATTERN:
1638
1639 /* Test image map for opacity. */
1640
1641 if (!(Layer->Pigment->Flags & HAS_FILTER))
1642 {
1643 Opaque = true;
1644 }
1645
1646 break;
1647
1648 case BITMAP_PATTERN:
1649
1650 /* Layer is not opaque if the image map is used just once. */
1651
1652 if (Layer->Vals.image != NULL)
1653 {
1654 if (Layer->Vals.image->Once_Flag)
1655 {
1656 break;
1657 }
1658 }
1659
1660 /* Layer is opaque if all materials are opaque. */
1661
1662 Help = true;
1663
1664 for (Material = Layer->Materials; Material != NULL; Material = Material->Next_Material)
1665 {
1666 if (!Test_Opacity(Material))
1667 {
1668 /* Material is not opaque --> layer is not opaque. */
1669
1670 Help = false;
1671
1672 break;
1673 }
1674 }
1675
1676 if (Help)
1677 {
1678 Opaque = true;
1679 }
1680
1681 break;
1682 }
1683 }
1684
1685 return(Opaque);
1686 }
1687
1688
1689 #if defined(USE_AVX_FMA4_FOR_NOISE)
1690
1691 /********************************************************************************************/
1692 /* AMD Specific optimizations: Its found that more than 50% of the time is spent in */
1693 /* Noise and DNoise. These functions have been optimized using AVX and FMA4 instructions */
1694 /* */
1695 /********************************************************************************************/
1696 DBL (*Noise) (const VECTOR EPoint, int noise_generator);
1697 void (*DNoise) (VECTOR result, const VECTOR EPoint);
1698
1699 /*****************************************************************************
1700 *
1701 * FUNCTION
1702 *
1703 * Initialise_NoiseDispatch
1704 *
1705 * INPUT
1706 *
1707 * None
1708 *
1709 * OUTPUT
1710 * Initialises the Noise and DNoise Function pointers to the right functions
1711 *
1712 *
1713 * RETURNS
1714 *
1715 * None
1716 *
1717 * AUTHOR
1718 *
1719 * AMD
1720 *
1721 * DESCRIPTION
1722 *
1723 * CHANGES
1724 *
1725 *
1726 ******************************************************************************/
1727
Initialise_NoiseDispatch()1728 void Initialise_NoiseDispatch()
1729 {
1730 static bool cpu_detected = false;
1731
1732 if(!cpu_detected)
1733 {
1734 if(CPU_FMA4_DETECT())
1735 {
1736 Noise = AVX_FMA4_Noise;
1737 DNoise = AVX_FMA4_DNoise;
1738 }
1739 else
1740 {
1741 Noise = OriNoise;
1742 DNoise = OriDNoise;
1743 }
1744
1745 cpu_detected = true;
1746 }
1747 }
1748
1749
1750
1751 /*****************************************************************************
1752 *
1753 * FUNCTION
1754 *
1755 * AVX_FMA4_Noise
1756 *
1757 * INPUT
1758 *
1759 * EPoint -- 3-D point at which noise is evaluated
1760 *
1761 * OUTPUT
1762 *
1763 * RETURNS
1764 *
1765 * DBL noise value
1766 *
1767 * AUTHOR
1768 *
1769 * Robert Skinner based on Ken Perlin
1770 *
1771 * DESCRIPTION
1772 *
1773 * CHANGES
1774 * Modified by AAC to ensure uniformly distributed clamped values
1775 * between 0 and 1.0...
1776 *
1777 * Feb 8, 2001: modified function based on MegaPov 0.7 to remove
1778 * bugs that showed up when noise was translated.
1779 *
1780 * Changes Made by AMD
1781 * July 28, 2011: Re-wrote OriNoise function using AVX and FMA4 intrinsic
1782 *
1783 ******************************************************************************/
1784
AVX_FMA4_Noise(const VECTOR EPoint,int noise_generator)1785 DBL AVX_FMA4_Noise(const VECTOR EPoint, int noise_generator)
1786 {
1787 DBL x, y, z;
1788 DBL *mp;
1789 int tmp;
1790 int ix, iy, iz;
1791 int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
1792 DBL x_ix,y_iy, z_iz;
1793
1794 DBL sum = 0.0;
1795
1796 // TODO FIXME - global statistics reference
1797 // Stats[Calls_To_Noise]++;
1798
1799 if (noise_generator==3)
1800 {
1801 // The 1.59 and 0.985 are to correct for some biasing problems with
1802 // the random # generator used to create the noise tables. Final
1803 // range of values is about 5.0e-4 below 0.0 and above 1.0. Mean
1804 // value is 0.49 (ideally it would be 0.5).
1805 sum = 0.5 * (1.59 * SolidNoise(EPoint) + 0.985);
1806
1807 // Clamp final value to 0-1 range
1808 if (sum < 0.0) sum = 0.0;
1809 if (sum > 1.0) sum = 1.0;
1810
1811 return sum;
1812 }
1813
1814 x = EPoint[X];
1815 y = EPoint[Y];
1816 z = EPoint[Z];
1817
1818 /* its equivalent integer lattice point. */
1819 /* ix = (int)x; iy = (int)y; iz = (long)z; */
1820 /* JB fix for the range problem */
1821 tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
1822 ix = (int)((tmp-MINX)&0xFFF);
1823 x_ix = x-tmp;
1824
1825 tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
1826 iy = (int)((tmp-MINY)&0xFFF);
1827 y_iy = y-tmp;
1828
1829 tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
1830 iz = (int)((tmp-MINZ)&0xFFF);
1831 z_iz = z-tmp;
1832
1833
1834 ixiy_hash = Hash2d(ix, iy);
1835 jxiy_hash = Hash2d(ix + 1, iy);
1836 ixjy_hash = Hash2d(ix, iy + 1);
1837 jxjy_hash = Hash2d(ix + 1, iy + 1);
1838
1839 __m128d three = {3.0,3.0};
1840 __m128d two = {2.0,2.0};
1841 __m128d one = {1.0,1.0};
1842
1843 __m128d ix_mm = _mm_set1_pd(x_ix);
1844 __m128d iy_mm = _mm_set1_pd(y_iy);
1845 __m128d iz_mm = _mm_set1_pd(z_iz);
1846
1847 __m128d jx_mm = _mm_sub_pd(ix_mm,one);
1848 __m128d jy_mm = _mm_sub_pd(iy_mm,one);
1849 __m128d jz_mm = _mm_sub_pd(iz_mm,one);
1850
1851
1852 __m128d mm_sx = _mm_mul_pd(_mm_mul_pd(ix_mm,ix_mm), _mm_nmacc_pd(two,ix_mm,three));
1853 __m128d mm_sy = _mm_mul_pd(_mm_mul_sd(iy_mm,iy_mm),_mm_nmacc_sd(two,iy_mm,three));
1854 __m128d mm_sz = _mm_mul_pd(_mm_mul_pd(iz_mm,iz_mm),_mm_nmacc_pd(two,iz_mm,three));
1855
1856 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
1857 DBL *mp2 = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
1858
1859 __m128d mm_tx = _mm_sub_pd(one,mm_sx);
1860 __m128d mm_ty = _mm_sub_sd(one,mm_sy);
1861 __m128d mm_tz = _mm_sub_pd(one,mm_sz);
1862 __m128d temp_mm = _mm_unpacklo_pd(mm_ty,mm_sy);
1863 __m128d mm_txty_txsy = _mm_mul_pd(mm_tx,temp_mm);
1864 __m128d mm_sxty_sxsy = _mm_mul_pd(mm_sx,temp_mm);
1865
1866 __m128d mp_t1 = _mm_loadu_pd(mp + 1);
1867 __m128d mp_t2 = _mm_loadu_pd(mp2 + 1);
1868
1869
1870 __m128d mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4)); // 44
1871 __m128d mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6)); // 66
1872
1873 __m128d mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
1874 __m128d mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
1875
1876 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
1877 mp2 = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
1878
1879 __m128d s_mm = _mm_mul_pd(mm_txty_txsy,mm_tz);
1880 __m128d y_mm = _mm_unpacklo_pd(iy_mm,jy_mm);
1881
1882 __m128d int_sumf = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
1883 int_sumf = _mm_macc_pd(y_mm,mp4_mm,int_sumf);
1884 int_sumf = _mm_macc_pd(iz_mm,mp6_mm,int_sumf);
1885 int_sumf = _mm_mul_pd(s_mm,int_sumf);
1886 //-2---------------------------------------------------------------
1887
1888 mp_t1 = _mm_loadu_pd(mp + 1);
1889 mp_t2 = _mm_loadu_pd(mp2 + 1);
1890
1891 mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4)); // 44
1892 mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6)); // 66
1893
1894 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
1895 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
1896
1897 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
1898 mp2 = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
1899
1900 s_mm = _mm_mul_pd(mm_txty_txsy,mm_sz);
1901
1902 __m128d int_sum1 = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
1903 int_sum1 = _mm_macc_pd(y_mm,mp4_mm,int_sum1);
1904 int_sum1 = _mm_macc_pd(jz_mm,mp6_mm,int_sum1);
1905 int_sum1 = _mm_macc_pd(s_mm,int_sum1,int_sumf);
1906 //-3---------------------------------------------------------------
1907
1908 mp_t1 = _mm_loadu_pd(mp + 1);
1909 mp_t2 = _mm_loadu_pd(mp2 + 1);
1910
1911 mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4)); // 44
1912 mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6)); // 66
1913
1914 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
1915 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
1916
1917 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
1918 mp2 = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
1919
1920 s_mm = _mm_mul_pd(mm_sxty_sxsy,mm_tz);
1921
1922 int_sumf = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
1923 int_sumf = _mm_macc_pd(y_mm,mp4_mm,int_sumf);
1924 int_sumf = _mm_macc_pd(iz_mm,mp6_mm,int_sumf);
1925 int_sumf = _mm_macc_pd(s_mm,int_sumf,int_sum1);
1926 //-4---------------------------------------------------------------
1927 mp_t1 = _mm_loadu_pd(mp + 1);
1928 mp_t2 = _mm_loadu_pd(mp2 + 1);
1929
1930 mp4_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 4),_mm_load_sd(mp2 + 4)); // 44
1931 mp6_mm = _mm_unpacklo_pd(_mm_load_sd(mp + 6),_mm_load_sd(mp2 + 6)); // 66
1932
1933 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
1934 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
1935
1936
1937 s_mm = _mm_mul_pd(mm_sxty_sxsy,mm_sz);
1938
1939 int_sum1 = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
1940 int_sum1 = _mm_macc_pd(y_mm,mp4_mm,int_sum1);
1941 int_sum1 = _mm_macc_pd(jz_mm,mp6_mm,int_sum1);
1942 int_sum1 = _mm_macc_pd(s_mm,int_sum1,int_sumf);
1943 int_sum1 = _mm_add_sd(_mm_unpackhi_pd(int_sum1,int_sum1),int_sum1);
1944 _mm_store_sd(&sum,int_sum1);
1945
1946 if(noise_generator==2)
1947 {
1948 /* details of range here:
1949 Min, max: -1.05242, 0.988997
1950 Mean: -0.0191481, Median: -0.535493, Std Dev: 0.256828
1951
1952 We want to change it to as close to [0,1] as possible.
1953 */
1954 sum += 1.05242;
1955 sum *= 0.48985582;
1956 /*sum *= 0.5;
1957 sum += 0.5;*/
1958
1959 if (sum < 0.0)
1960 sum = 0.0;
1961 if (sum > 1.0)
1962 sum = 1.0;
1963 }
1964 else
1965 {
1966 sum = sum + 0.5; /* range at this point -0.5 - 0.5... */
1967
1968 if (sum < 0.0)
1969 sum = 0.0;
1970 if (sum > 1.0)
1971 sum = 1.0;
1972 }
1973 return (sum);
1974 }
1975
1976
1977
1978 /*****************************************************************************
1979 *
1980 * FUNCTION
1981 *
1982 * AVX_FMA4_DNoise
1983 *
1984 * INPUT
1985 *
1986 * EPoint -- 3-D point at which noise is evaluated
1987 *
1988 * OUTPUT
1989 *
1990 * VECTOR result
1991 *
1992 * RETURNS
1993 *
1994 * AUTHOR
1995 *
1996 * Robert Skinner based on Ken Perlin
1997 *
1998 * DESCRIPTION
1999 * Vector-valued version of "Noise"
2000 *
2001 * CHANGES
2002 * Modified by AAC to ensure uniformly distributed clamped values
2003 * between 0 and 1.0...
2004 *
2005 * Feb 8, 2001: modified function based on MegaPov 0.7 to remove
2006 * bugs that showed up when noise was translated.
2007 *
2008 * Changes Made by AMD
2009 * July 28, 2011:Re-wrote OriDNoise function using AVX and FMA4 intrinsic
2010 *
2011 ******************************************************************************/
2012
AVX_FMA4_DNoise(VECTOR result,const VECTOR EPoint)2013 void AVX_FMA4_DNoise(VECTOR result, const VECTOR EPoint)
2014 {
2015 DBL x, y, z;
2016 int ix, iy, iz;
2017 DBL *mp;
2018 int tmp;
2019 int ixiy_hash, ixjy_hash, jxiy_hash, jxjy_hash;
2020 DBL x_ix,y_iy,z_iz;
2021
2022 // TODO FIXME - global statistics reference
2023 // Stats[Calls_To_DNoise]++;
2024
2025 x = EPoint[X];
2026 y = EPoint[Y];
2027 z = EPoint[Z];
2028
2029 /* its equivalent integer lattice point. */
2030 /*ix = (int)x; iy = (int)y; iz = (int)z;
2031 x_ix = x - ix; y_iy = y - iy; z_iz = z - iz;*/
2032 /* JB fix for the range problem */
2033 tmp = (x>=0)?(int)x:(int)(x-(1-EPSILON));
2034 ix = (int)((tmp-MINX)&0xFFF);
2035 x_ix = x-tmp;
2036
2037 tmp = (y>=0)?(int)y:(int)(y-(1-EPSILON));
2038 iy = (int)((tmp-MINY)&0xFFF);
2039 y_iy = y-tmp;
2040
2041 tmp = (z>=0)?(int)z:(int)(z-(1-EPSILON));
2042 iz = (int)((tmp-MINZ)&0xFFF);
2043 z_iz = z-tmp;
2044
2045 ixiy_hash = Hash2d(ix, iy);
2046 jxiy_hash = Hash2d(ix + 1, iy);
2047 ixjy_hash = Hash2d(ix, iy + 1);
2048 jxjy_hash = Hash2d(ix + 1, iy + 1);
2049
2050 __m128d three = {3.0,3.0};
2051 __m128d two = {2.0,2.0};
2052 __m128d one = {1.0,1.0};
2053
2054 __m128d ix_mm = _mm_set1_pd(x_ix);
2055 __m128d iy_mm = _mm_set1_pd(y_iy);
2056 __m128d iz_mm = _mm_set1_pd(z_iz);
2057
2058 __m128d jx_mm = _mm_sub_pd(ix_mm,one);
2059 __m128d jy_mm = _mm_sub_pd(iy_mm,one);
2060 __m128d jz_mm = _mm_sub_pd(iz_mm,one);
2061
2062 __m128d mm_sx = _mm_mul_pd(_mm_mul_pd(ix_mm,ix_mm), _mm_nmacc_pd(two,ix_mm,three));
2063 __m128d mm_sy = _mm_mul_pd(_mm_mul_pd(iy_mm,iy_mm),_mm_nmacc_pd(two,iy_mm,three));
2064 __m128d mm_sz = _mm_mul_pd(_mm_mul_pd(iz_mm,iz_mm),_mm_nmacc_pd(two,iz_mm,three));
2065
2066 __m128d mm_tx = _mm_sub_pd(one,mm_sx);
2067 __m128d mm_ty = _mm_sub_pd(one,mm_sy);
2068 __m128d mm_tz = _mm_sub_pd(one,mm_sz);
2069
2070 __m128d mm_txty = _mm_mul_pd(mm_tx,mm_ty);
2071 __m128d mm_s = _mm_mul_pd(mm_txty,mm_tz);
2072
2073 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz)];
2074 DBL *mp2 = mp+8;
2075
2076 __m128d mp_t1 = _mm_loadu_pd(mp + 1);
2077 __m128d mp_t2 = _mm_loadu_pd(mp2 + 1);
2078 __m128d mp_t3 = _mm_load_sd(mp + 4);
2079 __m128d mp_t4 = _mm_load_sd(mp2 + 4);
2080 __m128d mp_t5 = _mm_load_sd(mp + 6);
2081 __m128d mp_t6 = _mm_load_sd(mp2 + 6);
2082
2083 __m128d mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2084 __m128d mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2085
2086 __m128d mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2087 __m128d mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2088
2089 __m128d sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2090 sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,sum_X_Y);
2091 sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,sum_X_Y);
2092 sum_X_Y = _mm_mul_pd(mm_s,sum_X_Y);
2093
2094 mp2 = mp2+8;
2095 mp1_mm = _mm_load_sd(mp2 + 1);
2096 mp2_mm = _mm_load_sd(mp2 + 2);
2097 mp4_mm = _mm_load_sd(mp2 + 4);
2098 mp6_mm = _mm_load_sd(mp2 + 6);
2099
2100
2101 __m128d sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2102 sum__Z = _mm_macc_sd(iy_mm,mp4_mm,sum__Z);
2103 sum__Z = _mm_macc_sd(iz_mm,mp6_mm,sum__Z);
2104 sum__Z = _mm_mul_sd(mm_s,sum__Z);
2105
2106 //2--------------------------------------------------
2107
2108 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz)];
2109 mp2 = mp+8;
2110
2111 mp_t1 = _mm_loadu_pd(mp + 1);
2112 mp_t2 = _mm_loadu_pd(mp2 + 1);
2113 mp_t3 = _mm_load_sd(mp + 4);
2114 mp_t4 = _mm_load_sd(mp2 + 4);
2115 mp_t5 = _mm_load_sd(mp + 6);
2116 mp_t6 = _mm_load_sd(mp2 + 6);
2117
2118 __m128d mm_sxty = _mm_mul_pd(mm_sx,mm_ty);
2119 mm_s = _mm_mul_pd(mm_sxty,mm_tz);
2120
2121
2122 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2123 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2124
2125 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2126 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2127
2128 mp2 = mp2+8;
2129
2130 __m128d t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2131 t_sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,t_sum_X_Y);
2132 t_sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,t_sum_X_Y);
2133 sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2134
2135
2136 mp1_mm = _mm_load_sd(mp2 + 1);
2137 mp2_mm = _mm_load_sd(mp2 + 2);
2138 mp4_mm = _mm_load_sd(mp2 + 4);
2139 mp6_mm = _mm_load_sd(mp2 + 6);
2140
2141 __m128d t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2142 t_sum__Z = _mm_macc_sd(iy_mm,mp4_mm,t_sum__Z);
2143 t_sum__Z = _mm_macc_sd(iz_mm,mp6_mm,t_sum__Z);
2144 sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2145
2146 //3---------------------------------------------------------------
2147
2148
2149
2150 mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz)];
2151 mp2 = mp+8;
2152
2153 mp_t1 = _mm_loadu_pd(mp + 1);
2154 mp_t2 = _mm_loadu_pd(mp2 + 1);
2155 mp_t3 = _mm_load_sd(mp + 4);
2156 mp_t4 = _mm_load_sd(mp2 + 4);
2157 mp_t5 = _mm_load_sd(mp + 6);
2158 mp_t6 = _mm_load_sd(mp2 + 6);
2159
2160 __m128d mm_sxsy = _mm_mul_pd(mm_sx,mm_sy);
2161 mm_s = _mm_mul_pd(mm_sxsy,mm_tz);
2162
2163 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2164 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2165
2166 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2167 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2168
2169 mp2 = mp2+8;
2170
2171 t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2172 t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2173 t_sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,t_sum_X_Y);
2174 sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2175
2176
2177 mp1_mm = _mm_load_sd(mp2 + 1);
2178 mp2_mm = _mm_load_sd(mp2 + 2);
2179 mp4_mm = _mm_load_sd(mp2 + 4);
2180 mp6_mm = _mm_load_sd(mp2 + 6);
2181
2182 t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2183 t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2184 t_sum__Z = _mm_macc_sd(iz_mm,mp6_mm,t_sum__Z);
2185 sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2186
2187 //4-------------------------------------------------------------------
2188 mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz)];
2189 mp2 = mp+8;
2190
2191 mp_t1 = _mm_loadu_pd(mp + 1);
2192 mp_t2 = _mm_loadu_pd(mp2 + 1);
2193 mp_t3 = _mm_load_sd(mp + 4);
2194 mp_t4 = _mm_load_sd(mp2 + 4);
2195 mp_t5 = _mm_load_sd(mp + 6);
2196 mp_t6 = _mm_load_sd(mp2 + 6);
2197
2198 __m128d mm_txsy = _mm_mul_pd(mm_tx,mm_sy);
2199 mm_s = _mm_mul_pd(mm_txsy,mm_tz);
2200
2201 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2202 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2203
2204 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2205 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2206 mp2 = mp2+8;
2207
2208 t_sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2209 t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2210 t_sum_X_Y = _mm_macc_pd(iz_mm,mp6_mm,t_sum_X_Y);
2211 sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2212
2213
2214 mp1_mm = _mm_load_sd(mp2 + 1);
2215 mp2_mm = _mm_load_sd(mp2 + 2);
2216 mp4_mm = _mm_load_sd(mp2 + 4);
2217 mp6_mm = _mm_load_sd(mp2 + 6);
2218
2219 t_sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2220 t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2221 t_sum__Z = _mm_macc_sd(iz_mm,mp6_mm,t_sum__Z);
2222 sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2223 //5-----------------------------------------------------
2224 mp = &RTable[Hash1dRTableIndex(ixjy_hash, iz + 1)];
2225 mp2 = mp+8;
2226
2227 mp_t1 = _mm_loadu_pd(mp + 1);
2228 mp_t2 = _mm_loadu_pd(mp2 + 1);
2229 mp_t3 = _mm_load_sd(mp + 4);
2230 mp_t4 = _mm_load_sd(mp2 + 4);
2231 mp_t5 = _mm_load_sd(mp + 6);
2232 mp_t6 = _mm_load_sd(mp2 + 6);
2233
2234 mm_s = _mm_mul_pd(mm_txsy,mm_sz);
2235
2236 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2237 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2238
2239 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2240 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2241 mp2 = mp2+8;
2242
2243 t_sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2244 t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2245 t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2246 sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2247
2248
2249 mp1_mm = _mm_load_sd(mp2 + 1);
2250 mp2_mm = _mm_load_sd(mp2 + 2);
2251 mp4_mm = _mm_load_sd(mp2 + 4);
2252 mp6_mm = _mm_load_sd(mp2 + 6);
2253
2254 t_sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2255 t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2256 t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2257 sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2258
2259 //6------------------
2260 mp = &RTable[Hash1dRTableIndex(jxjy_hash, iz + 1)];
2261 mp2 = mp+8;
2262
2263 mp_t1 = _mm_loadu_pd(mp + 1);
2264 mp_t2 = _mm_loadu_pd(mp2 + 1);
2265 mp_t3 = _mm_load_sd(mp + 4);
2266 mp_t4 = _mm_load_sd(mp2 + 4);
2267 mp_t5 = _mm_load_sd(mp + 6);
2268 mp_t6 = _mm_load_sd(mp2 + 6);
2269
2270 mm_s = _mm_mul_pd(mm_sxsy,mm_sz);
2271
2272 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2273 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2274
2275 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2276 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2277
2278 mp2 = mp2+8;
2279
2280 t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2281 t_sum_X_Y = _mm_macc_pd(jy_mm,mp4_mm,t_sum_X_Y);
2282 t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2283 sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2284
2285
2286 mp1_mm = _mm_load_sd(mp2 + 1);
2287 mp2_mm = _mm_load_sd(mp2 + 2);
2288 mp4_mm = _mm_load_sd(mp2 + 4);
2289 mp6_mm = _mm_load_sd(mp2 + 6);
2290
2291 t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2292 t_sum__Z = _mm_macc_sd(jy_mm,mp4_mm,t_sum__Z);
2293 t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2294 sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2295
2296 //7-----------
2297 mp = &RTable[Hash1dRTableIndex(jxiy_hash, iz + 1)];
2298 mp2 = mp+8;
2299
2300 mp_t1 = _mm_loadu_pd(mp + 1);
2301 mp_t2 = _mm_loadu_pd(mp2 + 1);
2302 mp_t3 = _mm_load_sd(mp + 4);
2303 mp_t4 = _mm_load_sd(mp2 + 4);
2304 mp_t5 = _mm_load_sd(mp + 6);
2305 mp_t6 = _mm_load_sd(mp2 + 6);
2306
2307 mm_s = _mm_mul_pd(mm_sxty,mm_sz);
2308
2309 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2310 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2311
2312 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2313 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2314 mp2 = mp2+8;
2315 t_sum_X_Y = _mm_macc_pd(jx_mm,mp2_mm,mp1_mm);
2316 t_sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,t_sum_X_Y);
2317 t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2318 sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2319
2320
2321 mp1_mm = _mm_load_sd(mp2 + 1);
2322 mp2_mm = _mm_load_sd(mp2 + 2);
2323 mp4_mm = _mm_load_sd(mp2 + 4);
2324 mp6_mm = _mm_load_sd(mp2 + 6);
2325
2326 t_sum__Z = _mm_macc_sd(jx_mm,mp2_mm,mp1_mm);
2327 t_sum__Z = _mm_macc_sd(iy_mm,mp4_mm,t_sum__Z);
2328 t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2329 sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2330
2331 //8--------------
2332
2333
2334 mp = &RTable[Hash1dRTableIndex(ixiy_hash, iz + 1)];
2335 mp2 = mp+8;
2336
2337 mp_t1 = _mm_loadu_pd(mp + 1);
2338 mp_t2 = _mm_loadu_pd(mp2 + 1);
2339 mp_t3 = _mm_load_sd(mp + 4);
2340 mp_t4 = _mm_load_sd(mp2 + 4);
2341 mp_t5 = _mm_load_sd(mp + 6);
2342 mp_t6 = _mm_load_sd(mp2 + 6);
2343
2344 mm_s = _mm_mul_pd(mm_txty,mm_sz);
2345
2346 mp1_mm = _mm_unpacklo_pd(mp_t1,mp_t2); // 11
2347 mp2_mm= _mm_unpackhi_pd(mp_t1,mp_t2); // 22
2348
2349 mp4_mm = _mm_unpacklo_pd(mp_t3,mp_t4); // 44
2350 mp6_mm = _mm_unpacklo_pd(mp_t5,mp_t6); // 66
2351
2352 mp2 = mp2+8;
2353
2354 t_sum_X_Y = _mm_macc_pd(ix_mm,mp2_mm,mp1_mm);
2355 t_sum_X_Y = _mm_macc_pd(iy_mm,mp4_mm,t_sum_X_Y);
2356 t_sum_X_Y = _mm_macc_pd(jz_mm,mp6_mm,t_sum_X_Y);
2357 sum_X_Y = _mm_macc_pd(mm_s,t_sum_X_Y,sum_X_Y);
2358
2359
2360 mp1_mm = _mm_load_sd(mp2 + 1);
2361 mp2_mm = _mm_load_sd(mp2 + 2);
2362 mp4_mm = _mm_load_sd(mp2 + 4);
2363 mp6_mm = _mm_load_sd(mp2 + 6);
2364
2365 t_sum__Z = _mm_macc_sd(ix_mm,mp2_mm,mp1_mm);
2366 t_sum__Z = _mm_macc_sd(iy_mm,mp4_mm,t_sum__Z);
2367 t_sum__Z = _mm_macc_sd(jz_mm,mp6_mm,t_sum__Z);
2368 sum__Z = _mm_macc_sd(mm_s,t_sum__Z,sum__Z);
2369
2370 _mm_storeu_pd(result,sum_X_Y);
2371
2372 _mm_store_sd(&result[Z],sum__Z);
2373
2374 }
2375
2376 #endif
2377
2378
2379 }
2380