1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4
5 // clang-format off
6
7 #pragma once
8
9 #include <limits>
10 #include <type_traits>
11
12 #include <OpenImageIO/fmath.h>
13 #include <OpenImageIO/hash.h>
14 #include <OpenImageIO/simd.h>
15
16 #include <OSL/dual.h>
17 #include <OSL/dual_vec.h>
18 #include <OSL/sfm_simplex.h>
19 #include <OSL/sfmath.h>
20
21
22 OSL_NAMESPACE_ENTER
23
24
25 namespace oslnoise {
26
27 /////////////////////////////////////////////////////////////////////////
28 //
29 // Simple public API for computing the same noise functions that you get
30 // from OSL shaders.
31 //
32 // These match the properties of OSL noises that have the same names,
33 // please see the OSL specification for more detailed descriptions, we
34 // won't recapitulate it here.
35 //
36 // For the sake of compactness, we express the noise functions as
37 // templates for a either one or two domain parameters, which may be:
38 // (float) // 1-D domain noise, 1-argument variety
39 // (float, float) // 2-D domain noise, 2-argument variety
40 // (const Vec3 &) // 3-D domain noise, 1-argument variety
41 // (const Vec3 &, float) // 4-D domain noise, 2-argument variety
42 // And the range type may be
43 // float noisename () // float-valued noise
44 // Vec3 vnoisename() // vector-valued noise
45 // Note that in OSL we can overload function calls by return type, but we
46 // can't in C++, so we prepend a "v" in front of the names of functions
47 // that return vector-valued noise.
48 //
49 /////////////////////////////////////////////////////////////////////////
50
51 // Signed Perlin-like noise on 1-4 dimensional domain, range [-1,1].
52 template <typename S > OSL_HOSTDEVICE float snoise (S x);
53 template <typename S, typename T> OSL_HOSTDEVICE float snoise (S x, T y);
54 template <typename S > OSL_HOSTDEVICE Vec3 vsnoise (S x);
55 template <typename S, typename T> OSL_HOSTDEVICE Vec3 vsnoise (S x, T y);
56
57 // Unsigned Perlin-like noise on 1-4 dimensional domain, range [0,1].
58 template <typename S > OSL_HOSTDEVICE float noise (S x);
59 template <typename S, typename T> OSL_HOSTDEVICE float noise (S x, T y);
60 template <typename S > OSL_HOSTDEVICE Vec3 vnoise (S x);
61 template <typename S, typename T> OSL_HOSTDEVICE Vec3 vnoise (S x, T y);
62
63 // Cell noise on 1-4 dimensional domain, range [0,1].
64 // cellnoise is constant within each unit cube (cell) on the domain, but
65 // discontinuous at integer boundaries (and uncorrelated from cell to
66 // cell).
67 template <typename S > OSL_HOSTDEVICE float cellnoise (S x);
68 template <typename S, typename T> OSL_HOSTDEVICE float cellnoise (S x, T y);
69 template <typename S > OSL_HOSTDEVICE Vec3 vcellnoise (S x);
70 template <typename S, typename T> OSL_HOSTDEVICE Vec3 vcellnoise (S x, T y);
71
72 // Hash noise on 1-4 dimensional domain, range [0,1].
73 // hashnoise is like cellnoise, but without the 'floor' -- in other words,
74 // it's an uncorrelated hash that is different for every floating point
75 // value.
76 template <typename S > OSL_HOSTDEVICE float hashnoise (S x);
77 template <typename S, typename T> OSL_HOSTDEVICE float hashnoise (S x, T y);
78 template <typename S > OSL_HOSTDEVICE Vec3 vhashnoise (S x);
79 template <typename S, typename T> OSL_HOSTDEVICE Vec3 vhashnoise (S x, T y);
80
81 // FIXME -- eventually consider adding to the public API:
82 // * periodic varieties
83 // * varieties with derivatives
84 // * varieties that take/return simd::float3 rather than Imath::Vec3f.
85 // * exposing the simplex & gabor varieties
86
87
88 } // namespace oslnoise
89
90
91
92 ///////////////////////////////////////////////////////////////////////
93 // Implementation follows...
94 //
95 // Users don't need to worry about this part
96 ///////////////////////////////////////////////////////////////////////
97
98 struct NoiseParams;
99
100 namespace pvt {
101 using namespace OIIO::simd;
102 using namespace OIIO::bjhash;
103
104 typedef void (*NoiseGenericFunc)(int outdim, float *out, bool derivs,
105 int indim, const float *in,
106 const float *period, NoiseParams *params);
107 typedef void (*NoiseImplFunc)(float *out, const float *in,
108 const float *period, NoiseParams *params);
109
110 OSLNOISEPUBLIC OSL_HOSTDEVICE
111 float simplexnoise1 (float x, int seed=0, float *dnoise_dx=NULL);
112
113 OSLNOISEPUBLIC OSL_HOSTDEVICE
114 float simplexnoise2 (float x, float y, int seed=0,
115 float *dnoise_dx=NULL, float *dnoise_dy=NULL);
116
117 OSLNOISEPUBLIC OSL_HOSTDEVICE
118 float simplexnoise3 (float x, float y, float z, int seed=0,
119 float *dnoise_dx=NULL, float *dnoise_dy=NULL,
120 float *dnoise_dz=NULL);
121
122 OSLNOISEPUBLIC OSL_HOSTDEVICE
123 float simplexnoise4 (float x, float y, float z, float w, int seed=0,
124 float *dnoise_dx=NULL, float *dnoise_dy=NULL,
125 float *dnoise_dz=NULL, float *dnoise_dw=NULL);
126
127
128 namespace {
129
130 // convert a 32 bit integer into a floating point number in [0,1]
bits_to_01(unsigned int bits)131 inline OSL_HOSTDEVICE float bits_to_01 (unsigned int bits) {
132 // divide by 2^32-1
133 // Calculate inverse constant with double precision to avoid
134 // warning: implicit conversion from 'unsigned int' to 'float' changes value from 4294967295 to 4294967296
135 constexpr float convertFactor = static_cast<float>(static_cast<double>(1.0) / static_cast<double>(std::numeric_limits<unsigned int>::max()));
136 return bits * convertFactor;
137 }
138
139
140 #ifndef __CUDA_ARCH__
141 // Perform a bjmix (see OpenImageIO/hash.h) on 4 sets of values at once.
142 OSL_FORCEINLINE void
bjmix(int4 & a,int4 & b,int4 & c)143 bjmix (int4 &a, int4 &b, int4 &c)
144 {
145 using OIIO::simd::rotl32;
146 a -= c; a ^= rotl32(c, 4); c += b;
147 b -= a; b ^= rotl32(a, 6); a += c;
148 c -= b; c ^= rotl32(b, 8); b += a;
149 a -= c; a ^= rotl32(c,16); c += b;
150 b -= a; b ^= rotl32(a,19); a += c;
151 c -= b; c ^= rotl32(b, 4); b += a;
152 }
153
154 // Perform a bjfinal (see OpenImageIO/hash.h) on 4 sets of values at once.
155 OSL_FORCEINLINE int4
bjfinal(const int4 & a_,const int4 & b_,const int4 & c_)156 bjfinal (const int4& a_, const int4& b_, const int4& c_)
157 {
158 using OIIO::simd::rotl32;
159 int4 a(a_), b(b_), c(c_);
160 c ^= b; c -= rotl32(b,14);
161 a ^= c; a -= rotl32(c,11);
162 b ^= a; b -= rotl32(a,25);
163 c ^= b; c -= rotl32(b,16);
164 a ^= c; a -= rotl32(c,4);
165 b ^= a; b -= rotl32(a,14);
166 c ^= b; c -= rotl32(b,24);
167 return c;
168 }
169 #endif
170
171 #ifndef __OSL_USE_REFERENCE_INT_HASH
172 // Warning the reference hash may cause incorrect results when
173 // used inside a SIMD loop due to its complexity
174 #define __OSL_USE_REFERENCE_INT_HASH 0
175 #endif
176
177 /// hash an array of N 32 bit values into a pseudo-random value
178 /// based on my favorite hash: http://burtleburtle.net/bob/c/lookup3.c
179 /// templated so that the compiler can unroll the loops for us
180 template <int N>
181 OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
reference_inthash(const unsigned int k[N])182 reference_inthash (const unsigned int k[N]) {
183 // now hash the data!
184 unsigned int a, b, c, len = N;
185 a = b = c = 0xdeadbeef + (len << 2) + 13;
186 while (len > 3) {
187 a += k[0];
188 b += k[1];
189 c += k[2];
190 OIIO::bjhash::bjmix(a, b, c);
191 len -= 3;
192 k += 3;
193 }
194 switch (len) {
195 case 3 : c += k[2];
196 case 2 : b += k[1];
197 case 1 : a += k[0];
198 c = OIIO::bjhash::bjfinal(a, b, c);
199 case 0:
200 break;
201 }
202 return c;
203 }
204
205 #if (__OSL_USE_REFERENCE_INT_HASH == 0)
206 // Do not rely on compilers to fully optimizing the
207 // reference_inthash<int N> template above.
208 // The fact it takes an array parameter
209 // could cause confusion and extra work for a compiler to convert
210 // from x,y,z,w -> k[4] then index them. So to simplify things
211 // for compilers and avoid requiring actual stack space for the k[N]
212 // array versus tracking builtin integer types in registers,
213 // here are hand unrolled versions of inthash for 1-5 parameters.
214 // The upshot is no need to create a k[N] on the stack and hopefully
215 // simpler time for optimizer as it has no need to deal with while
216 // loop and switch statement.
217 OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
inthash(const unsigned int k0)218 inthash (const unsigned int k0) {
219 // now hash the data!
220 unsigned int start_val = 0xdeadbeef + (1 << 2) + 13;
221
222 unsigned int a = start_val + k0;
223 unsigned int c = OIIO::bjhash::bjfinal(a, start_val, start_val);
224 return c;
225 }
226
227 OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
inthash(const unsigned int k0,const unsigned int k1)228 inthash (const unsigned int k0, const unsigned int k1) {
229 // now hash the data!
230 unsigned int start_val = 0xdeadbeef + (2 << 2) + 13;
231
232 unsigned int a = start_val + k0;
233 unsigned int b = start_val + k1;
234 unsigned int c = OIIO::bjhash::bjfinal(a, b, start_val);
235 return c;
236 }
237
238 OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
inthash(const unsigned int k0,const unsigned int k1,const unsigned int k2)239 inthash (const unsigned int k0, const unsigned int k1, const unsigned int k2) {
240 // now hash the data!
241 unsigned int start_val = 0xdeadbeef + (3 << 2) + 13;
242
243 unsigned int a = start_val + k0;
244 unsigned int b = start_val + k1;
245 unsigned int c = start_val + k2;
246 c = OIIO::bjhash::bjfinal(a, b, c);
247 return c;
248 }
249
250 OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
inthash(const unsigned int k0,const unsigned int k1,const unsigned int k2,const unsigned int k3)251 inthash (const unsigned int k0, const unsigned int k1, const unsigned int k2, const unsigned int k3) {
252 // now hash the data!
253 unsigned int start_val = 0xdeadbeef + (4 << 2) + 13;
254
255 unsigned int a = start_val + k0;
256 unsigned int b = start_val + k1;
257 unsigned int c = start_val + k2;
258 OIIO::bjhash::bjmix(a, b, c);
259 a += k3;
260 c = OIIO::bjhash::bjfinal(a, b, c);
261 return c;
262 }
263
264 OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
inthash(const unsigned int k0,const unsigned int k1,const unsigned int k2,const unsigned int k3,const unsigned int k4)265 inthash (const unsigned int k0, const unsigned int k1, const unsigned int k2, const unsigned int k3, const unsigned int k4) {
266 // now hash the data!
267 unsigned int start_val = 0xdeadbeef + (5 << 2) + 13;
268
269 unsigned int a = start_val + k0;
270 unsigned int b = start_val + k1;
271 unsigned int c = start_val + k2;
272 OIIO::bjhash::bjmix(a, b, c);
273 b += k4;
274 a += k3;
275 c = OIIO::bjhash::bjfinal(a, b, c);
276 return c;
277 }
278 #endif
279
280 #ifndef __CUDA_ARCH__
281 // Do four 2D hashes simultaneously.
282 inline int4
inthash_simd(const int4 & key_x,const int4 & key_y)283 inthash_simd (const int4& key_x, const int4& key_y)
284 {
285 const int len = 2;
286 const int seed_ = (0xdeadbeef + (len << 2) + 13);
287 static const OIIO_SIMD4_ALIGN int seed[4] = { seed_,seed_,seed_,seed_};
288 int4 a = (*(int4*)&seed)+key_x, b = (*(int4*)&seed)+key_y, c = (*(int4*)&seed);
289 return bjfinal (a, b, c);
290 }
291
292
293 // Do four 3D hashes simultaneously.
294 inline int4
inthash_simd(const int4 & key_x,const int4 & key_y,const int4 & key_z)295 inthash_simd (const int4& key_x, const int4& key_y, const int4& key_z)
296 {
297 const int len = 3;
298 const int seed_ = (0xdeadbeef + (len << 2) + 13);
299 static const OIIO_SIMD4_ALIGN int seed[4] = { seed_,seed_,seed_,seed_};
300 int4 a = (*(int4*)&seed)+key_x, b = (*(int4*)&seed)+key_y, c = (*(int4*)&seed)+key_z;
301 return bjfinal (a, b, c);
302 }
303
304
305
306 // Do four 3D hashes simultaneously.
307 inline int4
inthash_simd(const int4 & key_x,const int4 & key_y,const int4 & key_z,const int4 & key_w)308 inthash_simd (const int4& key_x, const int4& key_y, const int4& key_z, const int4& key_w)
309 {
310 const int len = 4;
311 const int seed_ = (0xdeadbeef + (len << 2) + 13);
312 static const OIIO_SIMD4_ALIGN int seed[4] = { seed_,seed_,seed_,seed_};
313 int4 a = (*(int4*)&seed)+key_x, b = (*(int4*)&seed)+key_y, c = (*(int4*)&seed)+key_z;
314 bjmix (a, b, c);
315 a += key_w;
316 return bjfinal(a, b, c);
317 }
318 #endif
319
320
321 // Cell and Hash noise only differ in how they transform their inputs from
322 // float to unsigned int for use in the inthash function.
323 // IntHashNoiseBase serves as base class with DerivedT::transformToUint
324 // controlling how the float inputs are transformed
325 template<typename DerivedT>
326 struct IntHashNoiseBase {
IntHashNoiseBaseIntHashNoiseBase327 OSL_FORCEINLINE OSL_HOSTDEVICE IntHashNoiseBase () { }
328
operatorIntHashNoiseBase329 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x) const {
330 result = hashFloat(x);
331 }
332
operatorIntHashNoiseBase333 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float y) const {
334 result = hashFloat(x, y);
335 }
336
operatorIntHashNoiseBase337 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p) const {
338 result = hashFloat(p.x, p.y, p.z);
339 }
340
operatorIntHashNoiseBase341 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t) const {
342 result = hashFloat(p.x, p.y, p.z, t);
343 }
344
345
operatorIntHashNoiseBase346 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x) const {
347 result = hashVec(x);
348 }
349
operatorIntHashNoiseBase350 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y) const {
351 result = hashVec(x, y);
352 }
353
operatorIntHashNoiseBase354 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p) const {
355 result = hashVec(p.x, p.y, p.z);
356 }
357
operatorIntHashNoiseBase358 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t) const {
359 result = hashVec(p.x, p.y, p.z, t);
360 }
361
362
363 private:
364 template<typename ...ListT>
365 OSL_FORCEINLINE OSL_HOSTDEVICE float
hashFloatIntHashNoiseBase366 hashFloat (ListT... floatList) const {
367 return bits_to_01(inthash(DerivedT::transformToUint(floatList)...));
368 }
369
370 template<typename ...ListT>
371 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3
hashVecIntHashNoiseBase372 hashVec (ListT... floatList) const {
373 return inthashVec(DerivedT::transformToUint(floatList)...);
374 }
375
376 #if __OSL_USE_REFERENCE_INT_HASH
377 // Allow any number of arguments to be adapted to the array based reference_inthash
378 // and leave door open for overloads of hash3 with specific parameters
379
380 // Produce Vec3 result from any number of arguments with array based reference_inthash
381 // and extra seed values, but leave door open for overloads of inthashVec
382 // for specific parameter combinations
383 template<typename ...ListT>
384 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3
inthashVecIntHashNoiseBase385 inthashVec (ListT... uintList) const {
386 constexpr int N = sizeof...(uintList) + 1;
387 unsigned int k[N] = {uintList...};
388
389 Vec3 result;
390 k[N-1] = 0u; result.x = bits_to_01 (reference_inthash<N> (k));
391 k[N-1] = 1u; result.y = bits_to_01 (reference_inthash<N> (k));
392 k[N-1] = 2u; result.z = bits_to_01 (reference_inthash<N> (k));
393 return result;
394 }
395 #else
396 // Produce Vec3 result from any number of arguments with inthash and
397 // extra seed values, but leave door open for overloads of inthashVec
398 // for specific parameter combinations
399 template<typename ...ListT>
400 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3
inthashVecIntHashNoiseBase401 inthashVec (ListT... uintList) const {
402 return Vec3(bits_to_01(inthash(uintList..., 0u)),
403 bits_to_01(inthash(uintList..., 1u)),
404 bits_to_01(inthash(uintList..., 2u)));
405 }
406
407 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3
inthashVecIntHashNoiseBase408 inthashVec (const unsigned int k0, const unsigned int k1, const unsigned int k2) const {
409 // combine implementations of reference_inthash<3> and reference_inthash<4>
410 // so that we can only perform the bjmix once because k0,k1,k2 are the
411 // same values passed to reference_inthash<4>, only k3 differs
412 // and if we unroll the work it can be separated
413
414 // now hash the data!
415 unsigned int start_val = 0xdeadbeef + (4 << 2) + 13;
416
417 unsigned int a = start_val + k0;
418 unsigned int b = start_val + k1;
419 unsigned int c = start_val + k2;
420 OIIO::bjhash::bjmix(a, b, c);
421
422 return Vec3(bits_to_01( OIIO::bjhash::bjfinal(a+0, b, c) ),
423 bits_to_01( OIIO::bjhash::bjfinal(a+1, b, c) ),
424 bits_to_01( OIIO::bjhash::bjfinal(a+2, b, c) ));
425 }
426
427 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3
inthashVecIntHashNoiseBase428 inthashVec (const unsigned int k0, const unsigned int k1, const unsigned int k2, const unsigned int k3) const {
429 // combine implementations of reference_inthash<3> and reference_inthash<5>
430 // so that we can only perform the bjmix once because k0,k1,k2,k3 are the
431 // same values passed to reference_inthash<5>, only k4 differs
432 // and if we unroll the work it can be separated
433
434 // now hash the data!
435 unsigned int start_val = 0xdeadbeef + (5 << 2) + 13;
436
437 unsigned int a = start_val + k0;
438 unsigned int b = start_val + k1;
439 unsigned int c = start_val + k2;
440 OIIO::bjhash::bjmix(a, b, c);
441 unsigned int a2 = a + k3;
442
443 return Vec3(bits_to_01( OIIO::bjhash::bjfinal(a2, b+0, c) ),
444 bits_to_01( OIIO::bjhash::bjfinal(a2, b+1, c) ),
445 bits_to_01( OIIO::bjhash::bjfinal(a2, b+2, c) ));
446 }
447 #endif
448 };
449
450 struct CellNoise: public IntHashNoiseBase<CellNoise> {
CellNoiseCellNoise451 OSL_FORCEINLINE OSL_HOSTDEVICE CellNoise () { }
452
453 static OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
transformToUintCellNoise454 transformToUint(float val)
455 {
456 return OIIO::ifloor(val);
457 }
458 };
459
460 struct HashNoise: public IntHashNoiseBase<HashNoise> {
HashNoiseHashNoise461 OSL_FORCEINLINE OSL_HOSTDEVICE HashNoise () { }
462
463 static OSL_FORCEINLINE OSL_HOSTDEVICE unsigned int
transformToUintHashNoise464 transformToUint(float val)
465 {
466 return OIIO::bit_cast<float,unsigned int>(val);
467 }
468 };
469
470 // Periodic Cell and Hash Noise simply wraps its inputs before
471 // performing the same conversions and hashing as the non-periodic version.
472 // We define a wrapper on top of Cell or Hash Noise to reuse
473 // its underlying implementation.
474 template <typename BaseNoiseT>
475 struct PeriodicAdaptionOf {
PeriodicAdaptionOfPeriodicAdaptionOf476 OSL_FORCEINLINE OSL_HOSTDEVICE PeriodicAdaptionOf () { }
477
operatorPeriodicAdaptionOf478 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float px) const {
479 m_impl(result, wrap (x, px));
480 }
481
operatorPeriodicAdaptionOf482 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float y,
483 float px, float py) const {
484 m_impl(result, wrap (x, px),
485 wrap (y, py));
486 }
487
operatorPeriodicAdaptionOf488 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p,
489 const Vec3 &pp) const {
490 m_impl(result, wrap (p, pp));
491 }
492
operatorPeriodicAdaptionOf493 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t,
494 const Vec3 &pp, float tt) const {
495 m_impl(result, wrap (p, pp),
496 wrap (t, tt));
497 }
498
operatorPeriodicAdaptionOf499 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float px) const {
500 m_impl(result, wrap (x, px));
501 }
502
operatorPeriodicAdaptionOf503 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y,
504 float px, float py) const {
505 m_impl(result, wrap (x, px),
506 wrap (y, py));
507 }
508
operatorPeriodicAdaptionOf509 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, const Vec3 &pp) const {
510 m_impl(result, wrap (p, pp));
511 }
512
operatorPeriodicAdaptionOf513 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t,
514 const Vec3 &pp, float tt) const {
515 m_impl(result, wrap (p, pp),
516 wrap (t, tt));
517 }
518
519 private:
wrapPeriodicAdaptionOf520 OSL_FORCEINLINE OSL_HOSTDEVICE float wrap (float s, float period) const {
521 // TODO: investigate if quick_floor is legal here
522 // and or beneficial
523 period = floorf (period);
524 if (period < 1.0f)
525 period = 1.0f;
526 return s - period * floorf (s / period);
527 }
528
wrapPeriodicAdaptionOf529 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3 wrap (const Vec3 &s, const Vec3 &period) const {
530 return Vec3(wrap(s.x, period.x),
531 wrap(s.y, period.y),
532 wrap(s.z, period.z));
533 }
534
535 BaseNoiseT m_impl;
536 };
537
538 using PeriodicCellNoise = PeriodicAdaptionOf<CellNoise>;
539 using PeriodicHashNoise = PeriodicAdaptionOf<HashNoise>;
540
541
542
543
544 inline OSL_HOSTDEVICE int
inthashi(int x)545 inthashi (int x)
546 {
547 return static_cast<int>(inthash(
548 static_cast<unsigned int>(x)
549 ));
550 }
551
552 inline OSL_HOSTDEVICE int
inthashf(float x)553 inthashf (float x)
554 {
555 return static_cast<int>(
556 inthash(OIIO::bit_cast<float,unsigned int>(x))
557 );
558 }
559
560 inline OSL_HOSTDEVICE int
inthashf(float x,float y)561 inthashf (float x, float y)
562 {
563 return static_cast<int>(
564 inthash(
565 OIIO::bit_cast<float,unsigned int>(x),
566 OIIO::bit_cast<float,unsigned int>(y)
567 )
568 );
569 }
570
571
572 inline OSL_HOSTDEVICE int
inthashf(const float * x)573 inthashf (const float *x)
574 {
575 return static_cast<int>(
576 inthash(
577 OIIO::bit_cast<float,unsigned int>(x[0]),
578 OIIO::bit_cast<float,unsigned int>(x[1]),
579 OIIO::bit_cast<float,unsigned int>(x[2])
580 )
581 );
582 }
583
584
585 inline OSL_HOSTDEVICE int
inthashf(const float * x,float y)586 inthashf (const float *x, float y)
587 {
588 return static_cast<int>(
589 inthash(
590 OIIO::bit_cast<float,unsigned int>(x[0]),
591 OIIO::bit_cast<float,unsigned int>(x[1]),
592 OIIO::bit_cast<float,unsigned int>(x[2]),
593 OIIO::bit_cast<float,unsigned int>(y)
594 )
595 );
596 }
597
598
599 // Define select(bool,truevalue,falsevalue) template that works for a
600 // variety of types that we can use for both scalars and vectors. Because ?:
601 // won't work properly in template code with vector ops.
602 // NOTE: Removing template and require explicit functions for different
603 // combinations be specified using polymorphism. Main reason is so that
604 // different versions can pass parameters by value vs. reference.
605 //template <typename B, typename F>
606 //OSL_FORCEINLINE OSL_HOSTDEVICE F select (B b, const F& t, const F& f) { return b ? t : f; }
607
select(const bool b,float t,float f)608 OSL_FORCEINLINE OSL_HOSTDEVICE float select(const bool b, float t, float f) {
609 // NOTE: parameters must NOT be references, to avoid inlining caller's creation of
610 // these values down inside the conditional assignments which can create to complex
611 // of a code flow for Clang's vectorizer to handle
612 static_assert(!std::is_reference<decltype(b)>::value, "parameters to select cannot be references");
613 static_assert(!std::is_reference<decltype(t)>::value, "parameters to select cannot be references");
614 static_assert(!std::is_reference<decltype(f)>::value, "parameters to select cannot be references");
615 return b ? t : f;
616 }
617
select(const bool b,const Dual2<float> & t,const Dual2<float> & f)618 OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<float> select(const bool b, const Dual2<float> &t, const Dual2<float> &f) {
619 // Because t & f are a references, we don't want inlining to expand t.val(), t.dx(), or t.dy()
620 // inside the conditional branch creating a more complex flow by forcing the inlined
621 // abstract syntax tree to only be evaluated when the condition is true.
622 // To avoid this we assign t.val(), t.dx(), t.dy() to local values outside the
623 // conditional to insulate it.
624 // NOTE: This technique was also necessary to enable vectorization of nested select calls
625 float val(f.val());
626 float tval(t.val());
627
628 float dx(f.dx());
629 float tdx(t.dx());
630
631 float dy(f.dy());
632 float tdy(t.dy());
633
634 // Blend per builtin component to allow
635 // the compiler to track builtins and privatize the data layout
636 // versus requiring a stack location.
637 // Without this work per component, gathers & scatters were being emitted
638 // when used inside SIMD loops.
639 #if OSL_NON_INTEL_CLANG
640 // Clang's vectorizor was really insistent that a select operation could not be replaced
641 // with control flow, so had to re-introduce the ? operator to make it happy
642 return Dual2<float> (
643 b ? tval : val,
644 b ? tdx : dx,
645 b ? tdy : dy);
646 #else
647 if (b)
648 val = tval;
649
650 if (b)
651 dx = tdx;
652
653 if (b)
654 dy = tdy;
655
656 return Dual2<float> (
657 val,
658 dx,
659 dy);
660 #endif
661 }
662
663 #ifndef __CUDA_ARCH__
664 // Already provided by oiio/simd.h
665 //OSL_FORCEINLINE int4 select (const bool4& b, const int4& t, const int4& f) {
666 // return blend (f, t, b);
667 //}
668
669 // Already provided by oiio/simd.h
670 //OSL_FORCEINLINE float4 select (const bool4& b, const float4& t, const float4& f) {
671 // return blend (f, t, b);
672 //}
673
select(const int4 & b,const float4 & t,const float4 & f)674 OSL_FORCEINLINE float4 select (const int4& b, const float4& t, const float4& f) {
675 return blend (f, t, bool4(b));
676 }
677
678 OSL_FORCEINLINE Dual2<float4>
select(const bool4 & b,const Dual2<float4> & t,const Dual2<float4> & f)679 select (const bool4& b, const Dual2<float4>& t, const Dual2<float4>& f) {
680 return Dual2<float4> (blend (f.val(), t.val(), b),
681 blend (f.dx(), t.dx(), b),
682 blend (f.dy(), t.dy(), b));
683 }
684
select(const int4 & b,const Dual2<float4> & t,const Dual2<float4> & f)685 OSL_FORCEINLINE Dual2<float4> select (const int4& b, const Dual2<float4>& t, const Dual2<float4>& f) {
686 return select (bool4(b), t, f);
687 }
688 #endif
689
690
691
692 // Define negate_if(value,bool) that will work for both scalars and vectors,
693 // as well as Dual2's of both.
694 // NOTE: Removing template and require explicit functions for different
695 // combinations be specified using polymorphism. Main reason is so that
696 // different versions can pass parameters by value vs. reference.
697 //template <typename B, typename F>
698 //template<typename FLOAT, typename BOOL>
699 //OSL_FORCEINLINE OSL_HOSTDEVICE FLOAT negate_if (const FLOAT& val, const BOOL& b) {
700 // return b ? -val : val;
701 //}
702
negate_if(const float val,const bool cond)703 OSL_FORCEINLINE OSL_HOSTDEVICE float negate_if (const float val, const bool cond) {
704 // NOTE: parameters must NOT be references, to avoid inlining caller's creation of
705 // these values down inside the conditional assignments which can create to complex
706 // of a code flow for vectorizer to handle
707 static_assert(!std::is_reference<decltype(val)>::value, "parameters to negate_if cannot be references");
708 return cond ? -val : val;
709 }
710
711
negate_if(const Dual2<float> & val,const bool cond)712 OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<float> negate_if(const Dual2<float> &val, const bool cond) {
713 // NOTE: negation happens outside of conditional, then is blended based on the condition
714 Dual2<float> neg_val(-val);
715
716 // Blend per builtin component to allow
717 // the compiler to track builtins and privatize the data layout
718 // versus requiring a stack location.
719 float v = val.val();
720 if (cond) {
721 v = neg_val.val();
722 }
723
724 float dx = val.dx();
725 if (cond) {
726 dx = neg_val.dx();
727 }
728
729 float dy = val.dy();
730 if (cond) {
731 dy = neg_val.dy();
732 }
733
734 return Dual2<float>(v, dx, dy);
735 }
736
737 #if 0 // Unneeded currently
738 template <> OSL_FORCEINLINE Dual2<Vec3> negate_if(const Dual2<Vec3> &val, const bool &b) {
739 Dual2<Vec3> r;
740 // Use a ternary operation per builtin component to allow
741 // the compiler to track builtins and privatize the data layout
742 // versus requiring a stack location.
743 Dual2<Vec3> neg_val(-val);
744 r.val() = b ? neg_val.val() : val.val();
745 r.dx() = b ? neg_val.dx() : val.dx();
746 r.dy() = b ? neg_val.dy() : val.dy();
747 return r;
748 }
749 #endif
750
751 #ifndef __CUDA_ARCH__
negate_if(const float4 & val,const int4 & b)752 OSL_FORCEINLINE float4 negate_if (const float4& val, const int4& b) {
753 // Special case negate_if for SIMD -- can do it with bit tricks, no branches
754 int4 highbit (0x80000000);
755 return bitcast_to_float4 (bitcast_to_int4(val) ^ (blend0 (highbit, bool4(b))));
756 }
757
758 // Special case negate_if for SIMD -- can do it with bit tricks, no branches
negate_if(const Dual2<float4> & val,const int4 & b)759 OSL_FORCEINLINE Dual2<float4> negate_if (const Dual2<float4>& val, const int4& b)
760 {
761 return Dual2<float4> (negate_if (val.val(), b),
762 negate_if (val.dx(), b),
763 negate_if (val.dy(), b));
764 }
765 #endif
766
767
768 #ifndef __CUDA_ARCH__
769 // Define shuffle<> template that works with Dual2<float4> analogously to
770 // how it works for float4.
771 template<int i0, int i1, int i2, int i3>
shuffle(const Dual2<float4> & a)772 OSL_FORCEINLINE Dual2<float4> shuffle (const Dual2<float4>& a)
773 {
774 return Dual2<float4> (OIIO::simd::shuffle<i0,i1,i2,i3>(a.val()),
775 OIIO::simd::shuffle<i0,i1,i2,i3>(a.dx()),
776 OIIO::simd::shuffle<i0,i1,i2,i3>(a.dy()));
777 }
778
779 template<int i>
shuffle(const Dual2<float4> & a)780 OSL_FORCEINLINE Dual2<float4> shuffle (const Dual2<float4>& a)
781 {
782 return Dual2<float4> (OIIO::simd::shuffle<i>(a.val()),
783 OIIO::simd::shuffle<i>(a.dx()),
784 OIIO::simd::shuffle<i>(a.dy()));
785 }
786
787 // Define extract<> that works with Dual2<float4> analogously to how it
788 // works for float4.
789 template<int i>
extract(const Dual2<float4> & a)790 OSL_FORCEINLINE Dual2<float> extract (const Dual2<float4>& a)
791 {
792 return Dual2<float> (OIIO::simd::extract<i>(a.val()),
793 OIIO::simd::extract<i>(a.dx()),
794 OIIO::simd::extract<i>(a.dy()));
795 }
796 #endif
797
798
799
800 // Equivalent to OIIO::bilerp (a, b, c, d, u, v), but if abcd are already
801 // packed into a float4. We assume T is float and VECTYPE is float4,
802 // but it also works if T is Dual2<float> and VECTYPE is Dual2<float4>.
803 template<typename T, typename VECTYPE>
bilerp(VECTYPE abcd,T u,T v)804 OSL_FORCEINLINE OSL_HOSTDEVICE T bilerp (VECTYPE abcd, T u, T v) {
805 VECTYPE xx = OIIO::lerp (abcd, OIIO::simd::shuffle<1,1,3,3>(abcd), u);
806 return OIIO::simd::extract<0>(OIIO::lerp (xx,OIIO::simd::shuffle<2>(xx), v));
807 }
808
809 #ifndef __CUDA_ARCH__
810 // Equivalent to OIIO::bilerp (a, b, c, d, u, v), but if abcd are already
811 // packed into a float4 and uv are already packed into the first two
812 // elements of a float4. We assume VECTYPE is float4, but it also works if
813 // VECTYPE is Dual2<float4>.
bilerp(const Dual2<float4> & abcd,const Dual2<float4> & uv)814 OSL_FORCEINLINE Dual2<float> bilerp (const Dual2<float4>& abcd, const Dual2<float4>& uv) {
815 Dual2<float4> xx = OIIO::lerp (abcd, shuffle<1,1,3,3>(abcd), shuffle<0>(uv));
816 return extract<0>(OIIO::lerp (xx,shuffle<2>(xx), shuffle<1>(uv)));
817 }
818
819
820 // Equivalent to OIIO::trilerp (a, b, c, d, e, f, g, h, u, v, w), but if
821 // abcd and efgh are already packed into float4's and uvw are packed into
822 // the first 3 elements of a float4.
trilerp(const float4 & abcd,const float4 & efgh,const float4 & uvw)823 OSL_FORCEINLINE float trilerp (const float4& abcd, const float4& efgh, const float4& uvw) {
824 // Interpolate along z axis by w
825 float4 xy = OIIO::lerp (abcd, efgh, OIIO::simd::shuffle<2>(uvw));
826 // Interpolate along x axis by u
827 float4 xx = OIIO::lerp (xy, OIIO::simd::shuffle<1,1,3,3>(xy), OIIO::simd::shuffle<0>(uvw));
828 // interpolate along y axis by v
829 return OIIO::simd::extract<0>(OIIO::lerp (xx, OIIO::simd::shuffle<2>(xx), OIIO::simd::shuffle<1>(uvw)));
830 }
831 #endif
832
833
834
835 // always return a value inside [0,b) - even for negative numbers
imod(int a,int b)836 OSL_FORCEINLINE OSL_HOSTDEVICE int imod(int a, int b) {
837 #if 0
838 a %= b;
839 return a < 0 ? a + b : a;
840 #else
841 // Avoid confusing vectorizor by returning a single value
842 int remainder = a % b;
843 if (remainder < 0) {
844 remainder += b;
845 }
846 return remainder;
847 #endif
848 }
849
850 #ifndef __CUDA_ARCH__
851 // imod four values at once
imod(const int4 & a,int b)852 inline int4 imod(const int4& a, int b) {
853 int4 c = a % b;
854 return c + select(c < 0, int4(b), int4::Zero());
855 }
856 #endif
857
858 // floorfrac return ifloor as well as the fractional remainder
859 // FIXME: already implemented inside OIIO but can't easily override it for duals
860 // inside a different namespace
floorfrac(float x,int * i)861 OSL_FORCEINLINE OSL_HOSTDEVICE float floorfrac(float x, int* i) {
862 *i = OIIO::ifloor(x);
863 return x - *i;
864 }
865
866 // floorfrac with derivs
floorfrac(const Dual2<float> & x,int * i)867 OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<float> floorfrac(const Dual2<float> &x, int* i) {
868 float frac = floorfrac(x.val(), i);
869 // slope of x is not affected by this operation
870 return Dual2<float>(frac, x.dx(), x.dy());
871 }
872
873 #ifndef __CUDA_ARCH__
874 // floatfrac for four sets of values at once.
floorfrac(const float4 & x,int4 * i)875 inline float4 floorfrac(const float4& x, int4 * i) {
876 #if 0
877 float4 thefloor = floor(x);
878 *i = int4(thefloor);
879 return x-thefloor;
880 #else
881 int4 thefloor = OIIO::simd::ifloor (x);
882 *i = thefloor;
883 return x - float4(thefloor);
884 #endif
885 }
886
887 // floorfrac with derivs, computed on 4 values at once.
floorfrac(const Dual2<float4> & x,int4 * i)888 inline Dual2<float4> floorfrac(const Dual2<float4> &x, int4* i) {
889 float4 frac = floorfrac(x.val(), i);
890 // slope of x is not affected by this operation
891 return Dual2<float4>(frac, x.dx(), x.dy());
892 }
893 #endif
894
895
896 // Perlin 'fade' function. Can be overloaded for float, Dual2, as well
897 // as float4 / Dual2<float4>.
898 template <typename T>
fade(const T & t)899 OSL_HOSTDEVICE OSL_FORCEINLINE T fade (const T &t) {
900 return t * t * t * (t * (t * T(6.0f) - T(15.0f)) + T(10.0f));
901 }
902
903
904
905 // 1,2,3 and 4 dimensional gradient functions - perform a dot product against a
906 // randomly chosen vector. Note that the gradient vector is not normalized, but
907 // this only affects the overall "scale" of the result, so we simply account for
908 // the scale by multiplying in the corresponding "perlin" function.
909 // These factors were experimentally calculated to be:
910 // 1D: 0.188
911 // 2D: 0.507
912 // 3D: 0.936
913 // 4D: 0.870
914
915 template <typename T>
grad(int hash,const T & x)916 OSL_FORCEINLINE OSL_HOSTDEVICE T grad (int hash, const T &x) {
917 int h = hash & 15;
918 float g = 1 + (h & 7); // 1, 2, .., 8
919 if (h&8) g = -g; // random sign
920 return g * x; // dot-product
921 }
922
923 template <typename I, typename T>
grad(const I & hash,const T & x,const T & y)924 OSL_FORCEINLINE OSL_HOSTDEVICE T grad (const I &hash, const T &x, const T &y) {
925 // 8 possible directions (+-1,+-2) and (+-2,+-1)
926 I h = hash & 7;
927 T u = select (h<4, x, y);
928 T v = 2.0f * select (h<4, y, x);
929 // compute the dot product with (x,y).
930 return negate_if(u, h&1) + negate_if(v, h&2);
931 }
932
933 template <typename I, typename T>
grad(const I & hash,const T & x,const T & y,const T & z)934 OSL_FORCEINLINE OSL_HOSTDEVICE T grad (const I &hash, const T &x, const T &y, const T &z) {
935 // use vectors pointing to the edges of the cube
936 I h = hash & 15;
937 T u = select (h<8, x, y);
938 T v = select (h<4, y, select ((h==I(12))|(h==I(14)), x, z));
939 return negate_if(u,h&1) + negate_if(v,h&2);
940 }
941
942 template <typename T>
grad(const int hash,const T & x,const T & y,const T & z)943 OSL_FORCEINLINE OSL_HOSTDEVICE T grad (const int hash, const T &x, const T &y, const T &z) {
944 // use vectors pointing to the edges of the cube
945 int h = hash & 15;
946 T u = select (h<8, x, y);
947 // Changed from bitwise | to logical || to avoid conversions
948 // to integer from native boolean that was causing gather + scatters
949 // to be generated when used with the select vs.
950 // simple masking or blending.
951 // TODO: couldn't change the grad version above because OpenImageIO::v1_7::simd::bool4
952 // has no || operator defined. Would be preferable to implement bool4::operator||
953 // and not have this version of grad separate
954 T v = select (h<4, y, select ((h==int(12))||(h==int(14)), x, z));
955 return negate_if(u,h&1) + negate_if(v,h&2);
956 }
957
958 template <typename I, typename T>
grad(const I & hash,const T & x,const T & y,const T & z,const T & w)959 OSL_FORCEINLINE OSL_HOSTDEVICE T grad (const I &hash, const T &x, const T &y, const T &z, const T &w) {
960 // use vectors pointing to the edges of the hypercube
961 I h = hash & 31;
962 T u = select (h<24, x, y);
963 T v = select (h<16, y, z);
964 T s = select (h<8 , z, w);
965 return negate_if(u,h&1) + negate_if(v,h&2) + negate_if(s,h&4);
966 }
967
968 typedef Imath::Vec3<int> Vec3i;
969
grad(const Vec3i & hash,float x)970 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3 grad (const Vec3i &hash, float x) {
971 return Vec3 (grad (hash.x, x),
972 grad (hash.y, x),
973 grad (hash.z, x));
974 }
975
grad(const Vec3i & hash,Dual2<float> x)976 OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<Vec3> grad (const Vec3i &hash, Dual2<float> x) {
977 Dual2<float> rx = grad (hash.x, x);
978 Dual2<float> ry = grad (hash.y, x);
979 Dual2<float> rz = grad (hash.z, x);
980 return make_Vec3 (rx, ry, rz);
981 }
982
983
grad(const Vec3i & hash,float x,float y)984 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3 grad (const Vec3i &hash, float x, float y) {
985 return Vec3 (grad (hash.x, x, y),
986 grad (hash.y, x, y),
987 grad (hash.z, x, y));
988 }
989
grad(const Vec3i & hash,Dual2<float> x,Dual2<float> y)990 OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<Vec3> grad (const Vec3i &hash, Dual2<float> x, Dual2<float> y) {
991 Dual2<float> rx = grad (hash.x, x, y);
992 Dual2<float> ry = grad (hash.y, x, y);
993 Dual2<float> rz = grad (hash.z, x, y);
994 return make_Vec3 (rx, ry, rz);
995 }
996
grad(const Vec3i & hash,float x,float y,float z)997 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3 grad (const Vec3i &hash, float x, float y, float z) {
998 return Vec3 (grad (hash.x, x, y, z),
999 grad (hash.y, x, y, z),
1000 grad (hash.z, x, y, z));
1001 }
1002
grad(const Vec3i & hash,Dual2<float> x,Dual2<float> y,Dual2<float> z)1003 OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<Vec3> grad (const Vec3i &hash, Dual2<float> x, Dual2<float> y, Dual2<float> z) {
1004 Dual2<float> rx = grad (hash.x, x, y, z);
1005 Dual2<float> ry = grad (hash.y, x, y, z);
1006 Dual2<float> rz = grad (hash.z, x, y, z);
1007 return make_Vec3 (rx, ry, rz);
1008 }
1009
grad(const Vec3i & hash,float x,float y,float z,float w)1010 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3 grad (const Vec3i &hash, float x, float y, float z, float w) {
1011 return Vec3 (grad (hash.x, x, y, z, w),
1012 grad (hash.y, x, y, z, w),
1013 grad (hash.z, x, y, z, w));
1014 }
1015
grad(const Vec3i & hash,Dual2<float> x,Dual2<float> y,Dual2<float> z,Dual2<float> w)1016 OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<Vec3> grad (const Vec3i &hash, Dual2<float> x, Dual2<float> y, Dual2<float> z, Dual2<float> w) {
1017 Dual2<float> rx = grad (hash.x, x, y, z, w);
1018 Dual2<float> ry = grad (hash.y, x, y, z, w);
1019 Dual2<float> rz = grad (hash.z, x, y, z, w);
1020 return make_Vec3 (rx, ry, rz);
1021 }
1022
1023 template <typename T>
scale1(const T & result)1024 OSL_FORCEINLINE OSL_HOSTDEVICE T scale1 (const T &result) { return 0.2500f * result; }
1025 template <typename T>
scale2(const T & result)1026 OSL_FORCEINLINE OSL_HOSTDEVICE T scale2 (const T &result) { return 0.6616f * result; }
1027 template <typename T>
scale3(const T & result)1028 OSL_FORCEINLINE OSL_HOSTDEVICE T scale3 (const T &result) { return 0.9820f * result; }
1029 template <typename T>
scale4(const T & result)1030 OSL_FORCEINLINE OSL_HOSTDEVICE T scale4 (const T &result) { return 0.8344f * result; }
1031
1032
1033 struct HashScalar {
1034
operatorHashScalar1035 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x) const {
1036 return static_cast<int>(
1037 inthash(static_cast<unsigned int>(x))
1038 );
1039 }
1040
operatorHashScalar1041 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x, int y) const {
1042 return static_cast<int>(
1043 inthash(static_cast<unsigned int>(x),
1044 static_cast<unsigned int>(y))
1045 );
1046 }
1047
operatorHashScalar1048 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x, int y, int z) const {
1049 return static_cast<int>(
1050 inthash(static_cast<unsigned int>(x),
1051 static_cast<unsigned int>(y),
1052 static_cast<unsigned int>(z))
1053 );
1054 }
1055
operatorHashScalar1056 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x, int y, int z, int w) const {
1057 return static_cast<int>(
1058 inthash(static_cast<unsigned int>(x),
1059 static_cast<unsigned int>(y),
1060 static_cast<unsigned int>(z),
1061 static_cast<unsigned int>(w))
1062 );
1063 }
1064
1065 #ifndef __CUDA_ARCH__
1066 // 4 2D hashes at once!
operatorHashScalar1067 OSL_FORCEINLINE int4 operator() (const int4& x, const int4& y) const {
1068 return inthash_simd (x, y);
1069 }
1070
1071 // 4 3D hashes at once!
operatorHashScalar1072 OSL_FORCEINLINE int4 operator() (const int4& x, const int4& y, const int4& z) const {
1073 return inthash_simd (x, y, z);
1074 }
1075
1076 // 4 3D hashes at once!
operatorHashScalar1077 OSL_FORCEINLINE int4 operator() (const int4& x, const int4& y, const int4& z, const int4& w) const {
1078 return inthash_simd (x, y, z, w);
1079 }
1080 #endif
1081
1082 };
1083
1084
sliceup(unsigned int h)1085 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i sliceup (unsigned int h) {
1086 // we only need the low-order bits to be random, so split out
1087 // the 32 bit result into 3 parts for each channel
1088 return Vec3i(
1089 (h ) & 0xFF,
1090 (h >> 8 ) & 0xFF,
1091 (h >> 16) & 0xFF);
1092 }
1093
1094 struct HashVector {
convertToScalarHashVector1095 static OSL_FORCEINLINE OSL_HOSTDEVICE HashScalar convertToScalar() { return HashScalar(); }
1096
operatorHashVector1097 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x) const {
1098 return sliceup(inthash(static_cast<unsigned int>(x)));
1099 }
1100
operatorHashVector1101 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x, int y) const {
1102 return sliceup(inthash(static_cast<unsigned int>(x),
1103 static_cast<unsigned int>(y)));
1104 }
1105
operatorHashVector1106 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x, int y, int z) const {
1107 return sliceup(inthash(static_cast<unsigned int>(x),
1108 static_cast<unsigned int>(y),
1109 static_cast<unsigned int>(z)));
1110 }
operatorHashVector1111 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x, int y, int z, int w) const {
1112 return sliceup(inthash(static_cast<unsigned int>(x),
1113 static_cast<unsigned int>(y),
1114 static_cast<unsigned int>(z),
1115 static_cast<unsigned int>(w)));
1116 }
1117
1118 #ifndef __CUDA_ARCH__
1119 // Vector hash of 4 3D points at once
operatorHashVector1120 OSL_FORCEINLINE void operator() (int4 *result, const int4& x, const int4& y) const {
1121 int4 h = inthash_simd (x, y);
1122 result[0] = (h ) & 0xFF;
1123 result[1] = (srl(h,8 )) & 0xFF;
1124 result[2] = (srl(h,16)) & 0xFF;
1125 }
1126
1127 // Vector hash of 4 3D points at once
operatorHashVector1128 OSL_FORCEINLINE void operator() (int4 *result, const int4& x, const int4& y, const int4& z) const {
1129 int4 h = inthash_simd (x, y, z);
1130 result[0] = (h ) & 0xFF;
1131 result[1] = (srl(h,8 )) & 0xFF;
1132 result[2] = (srl(h,16)) & 0xFF;
1133 }
1134
1135 // Vector hash of 4 3D points at once
operatorHashVector1136 OSL_FORCEINLINE void operator() (int4 *result, const int4& x, const int4& y, const int4& z, const int4& w) const {
1137 int4 h = inthash_simd (x, y, z, w);
1138 result[0] = (h ) & 0xFF;
1139 result[1] = (srl(h,8 )) & 0xFF;
1140 result[2] = (srl(h,16)) & 0xFF;
1141 }
1142 #endif
1143
1144 };
1145
1146
1147 struct HashScalarPeriodic {
1148 private:
1149 friend struct HashVectorPeriodic;
HashScalarPeriodicHashScalarPeriodic1150 OSL_FORCEINLINE OSL_HOSTDEVICE HashScalarPeriodic () {}
1151 public:
HashScalarPeriodicHashScalarPeriodic1152 OSL_FORCEINLINE OSL_HOSTDEVICE HashScalarPeriodic (float px) {
1153 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1154 }
HashScalarPeriodicHashScalarPeriodic1155 OSL_FORCEINLINE OSL_HOSTDEVICE HashScalarPeriodic (float px, float py) {
1156 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1157 m_py = OIIO::ifloor(py); if (m_py < 1) m_py = 1;
1158 }
HashScalarPeriodicHashScalarPeriodic1159 OSL_FORCEINLINE OSL_HOSTDEVICE HashScalarPeriodic (float px, float py, float pz) {
1160 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1161 m_py = OIIO::ifloor(py); if (m_py < 1) m_py = 1;
1162 m_pz = OIIO::ifloor(pz); if (m_pz < 1) m_pz = 1;
1163 }
HashScalarPeriodicHashScalarPeriodic1164 OSL_FORCEINLINE OSL_HOSTDEVICE HashScalarPeriodic (float px, float py, float pz, float pw) {
1165 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1166 m_py = OIIO::ifloor(py); if (m_py < 1) m_py = 1;
1167 m_pz = OIIO::ifloor(pz); if (m_pz < 1) m_pz = 1;
1168 m_pw = OIIO::ifloor(pw); if (m_pw < 1) m_pw = 1;
1169 }
1170
1171 int m_px, m_py, m_pz, m_pw;
1172
operatorHashScalarPeriodic1173 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x) const {
1174 return static_cast<int>(
1175 inthash (static_cast<unsigned int>(imod (x, m_px)))
1176 );
1177 }
1178
operatorHashScalarPeriodic1179 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x, int y) const {
1180 return static_cast<int>(
1181 inthash(static_cast<unsigned int>(imod (x, m_px)),
1182 static_cast<unsigned int>(imod (y, m_py)))
1183 );
1184 }
1185
operatorHashScalarPeriodic1186 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x, int y, int z) const {
1187 return static_cast<int>(
1188 inthash(static_cast<unsigned int>(imod (x, m_px)),
1189 static_cast<unsigned int>(imod (y, m_py)),
1190 static_cast<unsigned int>(imod (z, m_pz)))
1191 );
1192 }
1193
operatorHashScalarPeriodic1194 OSL_FORCEINLINE OSL_HOSTDEVICE int operator() (int x, int y, int z, int w) const {
1195 return static_cast<int>(
1196 inthash(static_cast<unsigned int>(imod (x, m_px)),
1197 static_cast<unsigned int>(imod (y, m_py)),
1198 static_cast<unsigned int>(imod (z, m_pz)),
1199 static_cast<unsigned int>(imod (w, m_pw)))
1200 );
1201 }
1202
1203 #ifndef __CUDA_ARCH__
1204 // 4 2D hashes at once!
operatorHashScalarPeriodic1205 int4 operator() (const int4& x, const int4& y) const {
1206 return inthash_simd (imod(x,m_px), imod(y,m_py));
1207 }
1208
1209 // 4 3D hashes at once!
operatorHashScalarPeriodic1210 int4 operator() (const int4& x, const int4& y, const int4& z) const {
1211 return inthash_simd (imod(x,m_px), imod(y,m_py), imod(z,m_pz));
1212 }
1213
1214 // 4 4D hashes at once
operatorHashScalarPeriodic1215 int4 operator() (const int4& x, const int4& y, const int4& z, const int4& w) const {
1216 return inthash_simd (imod(x,m_px), imod(y,m_py), imod(z,m_pz), imod(w,m_pw));
1217 }
1218 #endif
1219
1220 };
1221
1222 struct HashVectorPeriodic {
HashVectorPeriodicHashVectorPeriodic1223 OSL_FORCEINLINE OSL_HOSTDEVICE HashVectorPeriodic (float px) {
1224 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1225 }
HashVectorPeriodicHashVectorPeriodic1226 OSL_FORCEINLINE OSL_HOSTDEVICE HashVectorPeriodic (float px, float py) {
1227 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1228 m_py = OIIO::ifloor(py); if (m_py < 1) m_py = 1;
1229 }
HashVectorPeriodicHashVectorPeriodic1230 OSL_FORCEINLINE OSL_HOSTDEVICE HashVectorPeriodic (float px, float py, float pz) {
1231 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1232 m_py = OIIO::ifloor(py); if (m_py < 1) m_py = 1;
1233 m_pz = OIIO::ifloor(pz); if (m_pz < 1) m_pz = 1;
1234 }
HashVectorPeriodicHashVectorPeriodic1235 OSL_FORCEINLINE OSL_HOSTDEVICE HashVectorPeriodic (float px, float py, float pz, float pw) {
1236 m_px = OIIO::ifloor(px); if (m_px < 1) m_px = 1;
1237 m_py = OIIO::ifloor(py); if (m_py < 1) m_py = 1;
1238 m_pz = OIIO::ifloor(pz); if (m_pz < 1) m_pz = 1;
1239 m_pw = OIIO::ifloor(pw); if (m_pw < 1) m_pw = 1;
1240 }
1241
1242 int m_px, m_py, m_pz, m_pw;
1243
convertToScalarHashVectorPeriodic1244 OSL_FORCEINLINE OSL_HOSTDEVICE HashScalarPeriodic convertToScalar() const {
1245 HashScalarPeriodic r;
1246 r.m_px = m_px;
1247 r.m_py = m_py;
1248 r.m_pz = m_pz;
1249 r.m_pw = m_pw;
1250 return r;
1251 }
1252
operatorHashVectorPeriodic1253 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x) const {
1254 return sliceup(inthash(static_cast<unsigned int>(imod (x, m_px))));
1255 }
1256
operatorHashVectorPeriodic1257 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x, int y) const {
1258 return sliceup(inthash(static_cast<unsigned int>(imod (x, m_px)),
1259 static_cast<unsigned int>(imod (y, m_py))));
1260 }
1261
operatorHashVectorPeriodic1262 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x, int y, int z) const {
1263 return sliceup(inthash(static_cast<unsigned int>(imod (x, m_px)),
1264 static_cast<unsigned int>(imod (y, m_py)),
1265 static_cast<unsigned int>(imod (z, m_pz))));
1266 }
1267
operatorHashVectorPeriodic1268 OSL_FORCEINLINE OSL_HOSTDEVICE Vec3i operator() (int x, int y, int z, int w) const {
1269 return sliceup(inthash(static_cast<unsigned int>(imod (x, m_px)),
1270 static_cast<unsigned int>(imod (y, m_py)),
1271 static_cast<unsigned int>(imod (z, m_pz)),
1272 static_cast<unsigned int>(imod (w, m_pw))));
1273 }
1274
1275 #ifndef __CUDA_ARCH__
1276 // Vector hash of 4 3D points at once
operatorHashVectorPeriodic1277 void operator() (int4 *result, const int4& x, const int4& y) const {
1278 int4 h = inthash_simd (imod(x,m_px), imod(y,m_py));
1279 result[0] = (h ) & 0xFF;
1280 result[1] = (srl(h,8 )) & 0xFF;
1281 result[2] = (srl(h,16)) & 0xFF;
1282 }
1283
1284 // Vector hash of 4 3D points at once
operatorHashVectorPeriodic1285 void operator() (int4 *result, const int4& x, const int4& y, const int4& z) const {
1286 int4 h = inthash_simd (imod(x,m_px), imod(y,m_py), imod(z,m_pz));
1287 result[0] = (h ) & 0xFF;
1288 result[1] = (srl(h,8 )) & 0xFF;
1289 result[2] = (srl(h,16)) & 0xFF;
1290 }
1291
1292 // Vector hash of 4 4D points at once
operatorHashVectorPeriodic1293 void operator() (int4 *result, const int4& x, const int4& y, const int4& z, const int4& w) const {
1294 int4 h = inthash_simd (imod(x,m_px), imod(y,m_py), imod(z,m_pz), imod(w,m_pw));
1295 result[0] = (h ) & 0xFF;
1296 result[1] = (srl(h,8 )) & 0xFF;
1297 result[2] = (srl(h,16)) & 0xFF;
1298 }
1299 #endif
1300 };
1301
1302 // Code Generation Policies
1303 // main intent is to allow top level classes to share implementation and carry
1304 // the policy down into helper functions who might diverge in implementation
1305 // or conditionally emit different code paths
1306 struct CGDefault
1307 {
1308 static constexpr bool allowSIMD = true;
1309 };
1310
1311 struct CGScalar
1312 {
1313 static constexpr bool allowSIMD = false;
1314 };
1315
1316 template <typename CGPolicyT = CGDefault, typename V, typename H, typename T>
perlin(V & result,H & hash,const T & x)1317 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (V& result, H& hash, const T &x) {
1318 int X; T fx = floorfrac(x, &X);
1319 T u = fade(fx);
1320
1321 auto lerp_result = OIIO::lerp(grad (hash (X ), fx ),
1322 grad (hash (X+1), fx-1.0f), u);
1323 result = scale1 (lerp_result);
1324 }
1325
1326 template <typename CGPolicyT = CGDefault, typename H>
perlin(float & result,const H & hash,const float & x,const float & y)1327 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (float &result, const H &hash, const float &x, const float &y)
1328 {
1329 #if OIIO_SIMD
1330 if (CGPolicyT::allowSIMD)
1331 {
1332 int4 XY;
1333 float4 fxy = floorfrac (float4(x,y,0.0f), &XY);
1334 float4 uv = fade(fxy); // Note: will be (fade(fx), fade(fy), 0, 0)
1335
1336 // We parallelize primarily by computing the hashes and gradients at the
1337 // integer lattice corners simultaneously.
1338 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1339 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1340 int4 cornerx = OIIO::simd::shuffle<0>(XY) + (*(int4*)i0101);
1341 int4 cornery = OIIO::simd::shuffle<1>(XY) + (*(int4*)i0011);
1342 int4 corner_hash = hash (cornerx, cornery);
1343 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1344 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1345 float4 remainderx = OIIO::simd::shuffle<0>(fxy) - (*(float4*)f0101);
1346 float4 remaindery = OIIO::simd::shuffle<1>(fxy) - (*(float4*)f0011);
1347 float4 corner_grad = grad (corner_hash, remainderx, remaindery);
1348 result = scale2 (bilerp (corner_grad, uv[0], uv[1]));
1349
1350 } else
1351 #endif
1352 {
1353 // ORIGINAL, non-SIMD
1354 int X; float fx = floorfrac(x, &X);
1355 int Y; float fy = floorfrac(y, &Y);
1356
1357 float u = fade(fx);
1358 float v = fade(fy);
1359
1360 float bilerp_result = OIIO::bilerp (grad (hash (X , Y ), fx , fy ),
1361 grad (hash (X+1, Y ), fx-1.0f, fy ),
1362 grad (hash (X , Y+1), fx , fy-1.0f),
1363 grad (hash (X+1, Y+1), fx-1.0f, fy-1.0f), u, v);
1364 result = scale2 (bilerp_result);
1365 }
1366 }
1367
1368 template <typename CGPolicyT = CGDefault, typename H>
perlin(float & result,const H & hash,const float & x,const float & y,const float & z)1369 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (float &result, const H &hash,
1370 const float &x, const float &y, const float &z)
1371 {
1372 #if OIIO_SIMD
1373 if (CGPolicyT::allowSIMD)
1374 {
1375 #if 0
1376 // You'd think it would be faster to do the floorfrac in parallel, but
1377 // according to my timings, it is not. I don't understand exactly why.
1378 int4 XYZ;
1379 float4 fxyz = floorfrac (float4(x,y,z), &XYZ);
1380 float4 uvw = fade (fxyz);
1381
1382 // We parallelize primarily by computing the hashes and gradients at the
1383 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1384 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1385 // with AVX.)
1386 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1387 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1388 int4 cornerx = shuffle<0>(XYZ) + (*(int4*)i0101);
1389 int4 cornery = shuffle<1>(XYZ) + (*(int4*)i0011);
1390 int4 cornerz = shuffle<2>(XYZ);
1391 #else
1392 int X; float fx = floorfrac(x, &X);
1393 int Y; float fy = floorfrac(y, &Y);
1394 int Z; float fz = floorfrac(z, &Z);
1395 float4 fxyz (fx, fy, fz); // = floorfrac (xyz, &XYZ);
1396 float4 uvw = fade (fxyz);
1397
1398 // We parallelize primarily by computing the hashes and gradients at the
1399 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1400 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1401 // with AVX.)
1402 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1403 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1404 int4 cornerx = X + (*(int4*)i0101);
1405 int4 cornery = Y + (*(int4*)i0011);
1406 int4 cornerz = Z;
1407 #endif
1408 int4 corner_hash_z0 = hash (cornerx, cornery, cornerz);
1409 int4 corner_hash_z1 = hash (cornerx, cornery, cornerz+int4::One());
1410
1411 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1412 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1413 float4 remainderx = OIIO::simd::shuffle<0>(fxyz) - (*(float4*)f0101);
1414 float4 remaindery = OIIO::simd::shuffle<1>(fxyz) - (*(float4*)f0011);
1415 float4 remainderz = OIIO::simd::shuffle<2>(fxyz);
1416 float4 corner_grad_z0 = grad (corner_hash_z0, remainderx, remaindery, remainderz);
1417 float4 corner_grad_z1 = grad (corner_hash_z1, remainderx, remaindery, remainderz-float4::One());
1418
1419 result = scale3 (trilerp (corner_grad_z0, corner_grad_z1, uvw));
1420 } else
1421 #endif
1422 {
1423 // ORIGINAL -- non-SIMD
1424 int X; float fx = floorfrac(x, &X);
1425 int Y; float fy = floorfrac(y, &Y);
1426 int Z; float fz = floorfrac(z, &Z);
1427 float u = fade(fx);
1428 float v = fade(fy);
1429 float w = fade(fz);
1430 float trilerp_result = OIIO::trilerp (grad (hash (X , Y , Z ), fx , fy , fz ),
1431 grad (hash (X+1, Y , Z ), fx-1.0f, fy , fz ),
1432 grad (hash (X , Y+1, Z ), fx , fy-1.0f, fz ),
1433 grad (hash (X+1, Y+1, Z ), fx-1.0f, fy-1.0f, fz ),
1434 grad (hash (X , Y , Z+1), fx , fy , fz-1.0f),
1435 grad (hash (X+1, Y , Z+1), fx-1.0f, fy , fz-1.0f),
1436 grad (hash (X , Y+1, Z+1), fx , fy-1.0f, fz-1.0f),
1437 grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f),
1438 u, v, w);
1439 result = scale3 (trilerp_result);
1440 }
1441 }
1442
1443
1444
1445 template <typename CGPolicyT = CGDefault, typename H>
perlin(float & result,const H & hash,const float & x,const float & y,const float & z,const float & w)1446 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (float &result, const H &hash,
1447 const float &x, const float &y, const float &z, const float &w)
1448 {
1449 #if OIIO_SIMD
1450 if (CGPolicyT::allowSIMD)
1451 {
1452
1453 int4 XYZW;
1454 float4 fxyzw = floorfrac (float4(x,y,z,w), &XYZW);
1455 float4 uvts = fade (fxyzw);
1456
1457 // We parallelize primarily by computing the hashes and gradients at the
1458 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1459 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1460 // with AVX.)
1461 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1462 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1463 int4 cornerx = OIIO::simd::shuffle<0>(XYZW) + (*(int4*)i0101);
1464 int4 cornery = OIIO::simd::shuffle<1>(XYZW) + (*(int4*)i0011);
1465 int4 cornerz = OIIO::simd::shuffle<2>(XYZW);
1466 int4 cornerz1 = cornerz + int4::One();
1467 int4 cornerw = OIIO::simd::shuffle<3>(XYZW);
1468
1469 int4 corner_hash_z0 = hash (cornerx, cornery, cornerz, cornerw);
1470 int4 corner_hash_z1 = hash (cornerx, cornery, cornerz1, cornerw);
1471 int4 cornerw1 = cornerw + int4::One();
1472 int4 corner_hash_z2 = hash (cornerx, cornery, cornerz, cornerw1);
1473 int4 corner_hash_z3 = hash (cornerx, cornery, cornerz1, cornerw1);
1474
1475 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1476 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1477 float4 remainderx = OIIO::simd::shuffle<0>(fxyzw) - (*(float4*)f0101);
1478 float4 remaindery = OIIO::simd::shuffle<1>(fxyzw) - (*(float4*)f0011);
1479 float4 remainderz = OIIO::simd::shuffle<2>(fxyzw);
1480 float4 remainderz1 = remainderz - float4::One();
1481 float4 remainderw = OIIO::simd::shuffle<3>(fxyzw);
1482 float4 corner_grad_z0 = grad (corner_hash_z0, remainderx, remaindery, remainderz, remainderw);
1483 float4 corner_grad_z1 = grad (corner_hash_z1, remainderx, remaindery, remainderz1, remainderw);
1484 float4 remainderw1 = remainderw - float4::One();
1485 float4 corner_grad_z2 = grad (corner_hash_z2, remainderx, remaindery, remainderz, remainderw1);
1486 float4 corner_grad_z3 = grad (corner_hash_z3, remainderx, remaindery, remainderz1, remainderw1);
1487
1488 result = scale4 (OIIO::lerp (trilerp (corner_grad_z0, corner_grad_z1, uvts),
1489 trilerp (corner_grad_z2, corner_grad_z3, uvts),
1490 OIIO::simd::extract<3>(uvts)));
1491 } else
1492 #endif
1493 {
1494 // ORIGINAL -- non-SIMD
1495 int X; float fx = floorfrac(x, &X);
1496 int Y; float fy = floorfrac(y, &Y);
1497 int Z; float fz = floorfrac(z, &Z);
1498 int W; float fw = floorfrac(w, &W);
1499
1500 float u = fade(fx);
1501 float v = fade(fy);
1502 float t = fade(fz);
1503 float s = fade(fw);
1504
1505 float lerp_result = OIIO::lerp (
1506 OIIO::trilerp (grad (hash (X , Y , Z , W ), fx , fy , fz , fw ),
1507 grad (hash (X+1, Y , Z , W ), fx-1.0f, fy , fz , fw ),
1508 grad (hash (X , Y+1, Z , W ), fx , fy-1.0f, fz , fw ),
1509 grad (hash (X+1, Y+1, Z , W ), fx-1.0f, fy-1.0f, fz , fw ),
1510 grad (hash (X , Y , Z+1, W ), fx , fy , fz-1.0f, fw ),
1511 grad (hash (X+1, Y , Z+1, W ), fx-1.0f, fy , fz-1.0f, fw ),
1512 grad (hash (X , Y+1, Z+1, W ), fx , fy-1.0f, fz-1.0f, fw ),
1513 grad (hash (X+1, Y+1, Z+1, W ), fx-1.0f, fy-1.0f, fz-1.0f, fw ),
1514 u, v, t),
1515 OIIO::trilerp (grad (hash (X , Y , Z , W+1), fx , fy , fz , fw-1.0f),
1516 grad (hash (X+1, Y , Z , W+1), fx-1.0f, fy , fz , fw-1.0f),
1517 grad (hash (X , Y+1, Z , W+1), fx , fy-1.0f, fz , fw-1.0f),
1518 grad (hash (X+1, Y+1, Z , W+1), fx-1.0f, fy-1.0f, fz , fw-1.0f),
1519 grad (hash (X , Y , Z+1, W+1), fx , fy , fz-1.0f, fw-1.0f),
1520 grad (hash (X+1, Y , Z+1, W+1), fx-1.0f, fy , fz-1.0f, fw-1.0f),
1521 grad (hash (X , Y+1, Z+1, W+1), fx , fy-1.0f, fz-1.0f, fw-1.0f),
1522 grad (hash (X+1, Y+1, Z+1, W+1), fx-1.0f, fy-1.0f, fz-1.0f, fw-1.0f),
1523 u, v, t),
1524 s);
1525 result = scale4 (lerp_result);
1526 }
1527 }
1528
1529 template <typename CGPolicyT = CGDefault, typename H>
perlin(Dual2<float> & result,const H & hash,const Dual2<float> & x,const Dual2<float> & y)1530 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<float> &result, const H &hash,
1531 const Dual2<float> &x, const Dual2<float> &y)
1532 {
1533 #if OIIO_SIMD
1534 if (CGPolicyT::allowSIMD)
1535 {
1536 int X; Dual2<float> fx = floorfrac(x, &X);
1537 int Y; Dual2<float> fy = floorfrac(y, &Y);
1538 Dual2<float4> fxy (float4(fx.val(), fy.val(), 0.0f),
1539 float4(fx.dx(), fy.dx(), 0.0f),
1540 float4(fx.dy(), fy.dy(), 0.0f));
1541 Dual2<float4> uv = fade (fxy);
1542
1543 // We parallelize primarily by computing the hashes and gradients at the
1544 // integer lattice corners simultaneously.
1545 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1546 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1547 int4 cornerx = X + (*(int4*)i0101);
1548 int4 cornery = Y + (*(int4*)i0011);
1549 int4 corner_hash = hash (cornerx, cornery);
1550
1551 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1552 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1553 Dual2<float4> remainderx = shuffle<0>(fxy) - (*(float4*)f0101);
1554 Dual2<float4> remaindery = shuffle<1>(fxy) - (*(float4*)f0011);
1555 Dual2<float4> corner_grad = grad (corner_hash, remainderx, remaindery);
1556
1557 result = scale2 (bilerp (corner_grad, uv));
1558 } else
1559 #endif
1560 {
1561 // Non-SIMD case
1562 int X; Dual2<float> fx = floorfrac(x, &X);
1563 int Y; Dual2<float> fy = floorfrac(y, &Y);
1564 Dual2<float> u = fade(fx);
1565 Dual2<float> v = fade(fy);
1566 auto bilerp_result = OIIO::bilerp (grad (hash (X , Y ), fx , fy ),
1567 grad (hash (X+1, Y ), fx-1.0f, fy ),
1568 grad (hash (X , Y+1), fx , fy-1.0f),
1569 grad (hash (X+1, Y+1), fx-1.0f, fy-1.0f), u, v);
1570 result = scale2 (bilerp_result);
1571 }
1572 }
1573
1574 template <typename CGPolicyT = CGDefault, typename H>
perlin(Dual2<float> & result,const H & hash,const Dual2<float> & x,const Dual2<float> & y,const Dual2<float> & z)1575 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<float> &result, const H &hash,
1576 const Dual2<float> &x, const Dual2<float> &y, const Dual2<float> &z)
1577 {
1578 #if OIIO_SIMD
1579 if (CGPolicyT::allowSIMD)
1580 {
1581 int X; Dual2<float> fx = floorfrac(x, &X);
1582 int Y; Dual2<float> fy = floorfrac(y, &Y);
1583 int Z; Dual2<float> fz = floorfrac(z, &Z);
1584 Dual2<float4> fxyz (float4(fx.val(), fy.val(), fz.val()),
1585 float4(fx.dx(), fy.dx(), fz.dx()),
1586 float4(fx.dy(), fy.dy(), fz.dy()));
1587 Dual2<float4> uvw = fade (fxyz);
1588
1589 // We parallelize primarily by computing the hashes and gradients at the
1590 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1591 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1592 // with AVX.)
1593 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1594 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1595 int4 cornerx = X + (*(int4*)i0101);
1596 int4 cornery = Y + (*(int4*)i0011);
1597 int4 cornerz = Z;
1598 int4 corner_hash_z0 = hash (cornerx, cornery, cornerz);
1599 int4 corner_hash_z1 = hash (cornerx, cornery, cornerz+int4::One());
1600
1601 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1602 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1603 Dual2<float4> remainderx = shuffle<0>(fxyz) - (*(float4*)f0101);
1604 Dual2<float4> remaindery = shuffle<1>(fxyz) - (*(float4*)f0011);
1605 Dual2<float4> remainderz = shuffle<2>(fxyz);
1606
1607 Dual2<float4> corner_grad_z0 = grad (corner_hash_z0, remainderx, remaindery, remainderz);
1608 Dual2<float4> corner_grad_z1 = grad (corner_hash_z1, remainderx, remaindery, remainderz-float4::One());
1609
1610 // Interpolate along the z axis first
1611 Dual2<float4> xy = OIIO::lerp (corner_grad_z0, corner_grad_z1, shuffle<2>(uvw));
1612 // Interpolate along x axis
1613 Dual2<float4> xx = OIIO::lerp (xy, shuffle<1,1,3,3>(xy), shuffle<0>(uvw));
1614 // interpolate along y axis
1615 result = scale3 (extract<0>(OIIO::lerp (xx,shuffle<2>(xx), shuffle<1>(uvw))));
1616 } else
1617 #endif
1618 {
1619 // Non-SIMD case
1620 int X; Dual2<float> fx = floorfrac(x, &X);
1621 int Y; Dual2<float> fy = floorfrac(y, &Y);
1622 int Z; Dual2<float> fz = floorfrac(z, &Z);
1623 Dual2<float> u = fade(fx);
1624 Dual2<float> v = fade(fy);
1625 Dual2<float> w = fade(fz);
1626 auto tril_result = OIIO::trilerp (grad (hash (X , Y , Z ), fx , fy , fz ),
1627 grad (hash (X+1, Y , Z ), fx-1.0f, fy , fz ),
1628 grad (hash (X , Y+1, Z ), fx , fy-1.0f, fz ),
1629 grad (hash (X+1, Y+1, Z ), fx-1.0f, fy-1.0f, fz ),
1630 grad (hash (X , Y , Z+1), fx , fy , fz-1.0f),
1631 grad (hash (X+1, Y , Z+1), fx-1.0f, fy , fz-1.0f),
1632 grad (hash (X , Y+1, Z+1), fx , fy-1.0f, fz-1.0f),
1633 grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f),
1634 u, v, w);
1635 result = scale3 (tril_result);
1636 }
1637 }
1638
1639 template <typename CGPolicyT = CGDefault, typename H>
perlin(Dual2<float> & result,const H & hash,const Dual2<float> & x,const Dual2<float> & y,const Dual2<float> & z,const Dual2<float> & w)1640 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<float> &result, const H &hash,
1641 const Dual2<float> &x, const Dual2<float> &y,
1642 const Dual2<float> &z, const Dual2<float> &w)
1643 {
1644 #if OIIO_SIMD
1645 if (CGPolicyT::allowSIMD)
1646 {
1647 int X; Dual2<float> fx = floorfrac(x, &X);
1648 int Y; Dual2<float> fy = floorfrac(y, &Y);
1649 int Z; Dual2<float> fz = floorfrac(z, &Z);
1650 int W; Dual2<float> fw = floorfrac(w, &W);
1651 Dual2<float4> fxyzw (float4(fx.val(), fy.val(), fz.val(), fw.val()),
1652 float4(fx.dx (), fy.dx (), fz.dx (), fw.dx ()),
1653 float4(fx.dy (), fy.dy (), fz.dy (), fw.dy ()));
1654 Dual2<float4> uvts = fade (fxyzw);
1655
1656 // We parallelize primarily by computing the hashes and gradients at the
1657 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1658 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1659 // with AVX.)
1660 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1661 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1662 int4 cornerx = X + (*(int4*)i0101);
1663 int4 cornery = Y + (*(int4*)i0011);
1664 int4 cornerz = Z;
1665 int4 cornerz1 = Z + int4::One();
1666 int4 cornerw = W;
1667 int4 cornerw1 = W + int4::One();
1668 int4 corner_hash_z0 = hash (cornerx, cornery, cornerz, cornerw);
1669 int4 corner_hash_z1 = hash (cornerx, cornery, cornerz1, cornerw);
1670 int4 corner_hash_z2 = hash (cornerx, cornery, cornerz, cornerw1);
1671 int4 corner_hash_z3 = hash (cornerx, cornery, cornerz1, cornerw1);
1672
1673 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1674 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1675 Dual2<float4> remainderx = shuffle<0>(fxyzw) - (*(float4*)f0101);
1676 Dual2<float4> remaindery = shuffle<1>(fxyzw) - (*(float4*)f0011);
1677 Dual2<float4> remainderz = shuffle<2>(fxyzw);
1678 Dual2<float4> remainderz1 = remainderz - float4::One();
1679 Dual2<float4> remainderw = shuffle<3>(fxyzw);
1680 Dual2<float4> remainderw1 = remainderw - float4::One();
1681
1682 Dual2<float4> corner_grad_z0 = grad (corner_hash_z0, remainderx, remaindery, remainderz, remainderw);
1683 Dual2<float4> corner_grad_z1 = grad (corner_hash_z1, remainderx, remaindery, remainderz1, remainderw);
1684 Dual2<float4> corner_grad_z2 = grad (corner_hash_z2, remainderx, remaindery, remainderz, remainderw1);
1685 Dual2<float4> corner_grad_z3 = grad (corner_hash_z3, remainderx, remaindery, remainderz1, remainderw1);
1686
1687 // Interpolate along the w axis first
1688 Dual2<float4> xyz0 = OIIO::lerp (corner_grad_z0, corner_grad_z2, shuffle<3>(uvts));
1689 Dual2<float4> xyz1 = OIIO::lerp (corner_grad_z1, corner_grad_z3, shuffle<3>(uvts));
1690 Dual2<float4> xy = OIIO::lerp (xyz0, xyz1, shuffle<2>(uvts));
1691 // Interpolate along x axis
1692 Dual2<float4> xx = OIIO::lerp (xy, shuffle<1,1,3,3>(xy), shuffle<0>(uvts));
1693 // interpolate along y axis
1694 result = scale4 (extract<0>(OIIO::lerp (xx,shuffle<2>(xx), shuffle<1>(uvts))));
1695 } else
1696 #endif
1697 {
1698 // ORIGINAL -- non-SIMD
1699 int X; Dual2<float> fx = floorfrac(x, &X);
1700 int Y; Dual2<float> fy = floorfrac(y, &Y);
1701 int Z; Dual2<float> fz = floorfrac(z, &Z);
1702 int W; Dual2<float> fw = floorfrac(w, &W);
1703
1704 Dual2<float> u = fade(fx);
1705 Dual2<float> v = fade(fy);
1706 Dual2<float> t = fade(fz);
1707 Dual2<float> s = fade(fw);
1708
1709 auto lerp_result = OIIO::lerp (
1710 OIIO::trilerp (grad (hash (X , Y , Z , W ), fx , fy , fz , fw ),
1711 grad (hash (X+1, Y , Z , W ), fx-1.0f, fy , fz , fw ),
1712 grad (hash (X , Y+1, Z , W ), fx , fy-1.0f, fz , fw ),
1713 grad (hash (X+1, Y+1, Z , W ), fx-1.0f, fy-1.0f, fz , fw ),
1714 grad (hash (X , Y , Z+1, W ), fx , fy , fz-1.0f, fw ),
1715 grad (hash (X+1, Y , Z+1, W ), fx-1.0f, fy , fz-1.0f, fw ),
1716 grad (hash (X , Y+1, Z+1, W ), fx , fy-1.0f, fz-1.0f, fw ),
1717 grad (hash (X+1, Y+1, Z+1, W ), fx-1.0f, fy-1.0f, fz-1.0f, fw ),
1718 u, v, t),
1719 OIIO::trilerp (grad (hash (X , Y , Z , W+1), fx , fy , fz , fw-1.0f),
1720 grad (hash (X+1, Y , Z , W+1), fx-1.0f, fy , fz , fw-1.0f),
1721 grad (hash (X , Y+1, Z , W+1), fx , fy-1.0f, fz , fw-1.0f),
1722 grad (hash (X+1, Y+1, Z , W+1), fx-1.0f, fy-1.0f, fz , fw-1.0f),
1723 grad (hash (X , Y , Z+1, W+1), fx , fy , fz-1.0f, fw-1.0f),
1724 grad (hash (X+1, Y , Z+1, W+1), fx-1.0f, fy , fz-1.0f, fw-1.0f),
1725 grad (hash (X , Y+1, Z+1, W+1), fx , fy-1.0f, fz-1.0f, fw-1.0f),
1726 grad (hash (X+1, Y+1, Z+1, W+1), fx-1.0f, fy-1.0f, fz-1.0f, fw-1.0f),
1727 u, v, t),
1728 s);
1729 result = scale4 (lerp_result);
1730 }
1731 }
1732
1733
1734 template <typename CGPolicyT = CGDefault, typename H>
perlin(Vec3 & result,const H & hash,const float & x,const float & y)1735 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Vec3 &result, const H &hash,
1736 const float &x, const float &y)
1737 {
1738 #if OIIO_SIMD
1739 if (CGPolicyT::allowSIMD)
1740 {
1741 int4 XYZ;
1742 float4 fxyz = floorfrac (float4(x,y,0), &XYZ);
1743 float4 uv = fade (fxyz);
1744
1745 // We parallelize primarily by computing the hashes and gradients at the
1746 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1747 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1748 // with AVX.)
1749 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1750 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1751 int4 cornerx = OIIO::simd::shuffle<0>(XYZ) + (*(int4*)i0101);
1752 int4 cornery = OIIO::simd::shuffle<1>(XYZ) + (*(int4*)i0011);
1753
1754 // We actually derive 3 hashes (one for each output dimension) for each
1755 // corner.
1756 int4 corner_hash[3];
1757 hash (corner_hash, cornerx, cornery);
1758
1759 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1760 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1761 float4 remainderx = OIIO::simd::shuffle<0>(fxyz) - (*(float4*)f0101);
1762 float4 remaindery = OIIO::simd::shuffle<1>(fxyz) - (*(float4*)f0011);
1763 float result_comp[3];
1764 for (int i = 0; i < 3; ++i) {
1765 float4 corner_grad = grad (corner_hash[i], remainderx, remaindery);
1766 // Do the bilinear interpolation with SIMD. Here's the fastest way
1767 // I've found to do it.
1768 float4 xx = OIIO::lerp (corner_grad, OIIO::simd::shuffle<1,1,3,3>(corner_grad), uv[0]);
1769 result_comp[i] = scale2 (OIIO::simd::extract<0>(OIIO::lerp (xx,OIIO::simd::shuffle<2>(xx), uv[1])));
1770 }
1771 result = Vec3(result_comp[0], result_comp[1], result_comp[2]);
1772 } else
1773 #endif
1774 {
1775 // Non-SIMD case
1776 typedef float T;
1777 int X; T fx = floorfrac(x, &X);
1778 int Y; T fy = floorfrac(y, &Y);
1779 T u = fade(fx);
1780 T v = fade(fy);
1781 auto bil_result = OIIO::bilerp (grad (hash (X , Y ), fx , fy ),
1782 grad (hash (X+1, Y ), fx-1.0f, fy ),
1783 grad (hash (X , Y+1), fx , fy-1.0f),
1784 grad (hash (X+1, Y+1), fx-1.0f, fy-1.0f), u, v);
1785 result = scale2 (bil_result);
1786 }
1787 }
1788
1789
1790
1791 template <typename CGPolicyT = CGDefault, typename H>
perlin(Vec3 & result,const H & hash,const float & x,const float & y,const float & z)1792 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Vec3 &result, const H &hash,
1793 const float &x, const float &y, const float &z)
1794 {
1795 #if OIIO_SIMD
1796 if (CGPolicyT::allowSIMD)
1797 {
1798 #if 0
1799 // You'd think it would be faster to do the floorfrac in parallel, but
1800 // according to my timings, it is not. Come back and understand why.
1801 int4 XYZ;
1802 float4 fxyz = floorfrac (float4(x,y,z), &XYZ);
1803 float4 uvw = fade (fxyz);
1804
1805 // We parallelize primarily by computing the hashes and gradients at the
1806 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1807 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1808 // with AVX.)
1809 int4 cornerx = shuffle<0>(XYZ) + int4(0,1,0,1);
1810 int4 cornery = shuffle<1>(XYZ) + int4(0,0,1,1);
1811 int4 cornerz = shuffle<2>(XYZ);
1812 #else
1813 int X; float fx = floorfrac(x, &X);
1814 int Y; float fy = floorfrac(y, &Y);
1815 int Z; float fz = floorfrac(z, &Z);
1816 float4 fxyz (fx, fy, fz); // = floorfrac (xyz, &XYZ);
1817 float4 uvw = fade (fxyz);
1818
1819 // We parallelize primarily by computing the hashes and gradients at the
1820 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1821 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1822 // with AVX.)
1823 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1824 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1825 int4 cornerx = X + (*(int4*)i0101);
1826 int4 cornery = Y + (*(int4*)i0011);
1827 int4 cornerz = Z;
1828 #endif
1829
1830 // We actually derive 3 hashes (one for each output dimension) for each
1831 // corner.
1832 int4 corner_hash_z0[3], corner_hash_z1[3];
1833 hash (corner_hash_z0, cornerx, cornery, cornerz);
1834 hash (corner_hash_z1, cornerx, cornery, cornerz+int4::One());
1835
1836 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1837 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1838 float4 remainderx = OIIO::simd::shuffle<0>(fxyz) - (*(float4*)f0101);
1839 float4 remaindery = OIIO::simd::shuffle<1>(fxyz) - (*(float4*)f0011);
1840 float4 remainderz0 = OIIO::simd::shuffle<2>(fxyz);
1841 float4 remainderz1 = OIIO::simd::shuffle<2>(fxyz) - float4::One();
1842 float result_comp[3];
1843 for (int i = 0; i < 3; ++i) {
1844 float4 corner_grad_z0 = grad (corner_hash_z0[i], remainderx, remaindery, remainderz0);
1845 float4 corner_grad_z1 = grad (corner_hash_z1[i], remainderx, remaindery, remainderz1);
1846
1847 // Interpolate along the z axis first
1848 float4 xy = OIIO::lerp (corner_grad_z0, corner_grad_z1, OIIO::simd::shuffle<2>(uvw));
1849 // Interpolate along x axis
1850 float4 xx = OIIO::lerp (xy, OIIO::simd::shuffle<1,1,3,3>(xy), OIIO::simd::shuffle<0>(uvw));
1851 // interpolate along y axis
1852 result_comp[i] = scale3 (OIIO::simd::extract<0>(OIIO::lerp (xx,OIIO::simd::shuffle<2>(xx), OIIO::simd::shuffle<1>(uvw))));
1853 }
1854 result = Vec3(result_comp[0],result_comp[1],result_comp[2]);
1855 } else
1856 #endif
1857 {
1858 // ORIGINAL -- non-SIMD
1859 int X; float fx = floorfrac(x, &X);
1860 int Y; float fy = floorfrac(y, &Y);
1861 int Z; float fz = floorfrac(z, &Z);
1862 float u = fade(fx);
1863 float v = fade(fy);
1864 float w = fade(fz);
1865
1866 // A.W. the OIIO_SIMD above differs from the original results
1867 // so we are re-implementing the non-SIMD version to match below
1868 #if 0
1869 result = OIIO::trilerp (grad (hash (X , Y , Z ), fx , fy , fz ),
1870 grad (hash (X+1, Y , Z ), fx-1.0f, fy , fz ),
1871 grad (hash (X , Y+1, Z ), fx , fy-1.0f, fz ),
1872 grad (hash (X+1, Y+1, Z ), fx-1.0f, fy-1.0f, fz ),
1873 grad (hash (X , Y , Z+1), fx , fy , fz-1.0f ),
1874 grad (hash (X+1, Y , Z+1), fx-1.0f, fy , fz-1.0f ),
1875 grad (hash (X , Y+1, Z+1), fx , fy-1.0f, fz-1.0f ),
1876 grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f ),
1877 u, v, w);
1878 result = scale3 (result);
1879 #else
1880 static_assert(std::is_same<Vec3i, decltype(hash(X, Y, Z))>::value, "This re-implementation was developed for Hashs returning a vector type only");
1881 // Want to avoid repeating the same hash work 3 times in a row
1882 // so rather than executing the vector version, we will use HashScalar
1883 // and directly mask its results on a per component basis
1884 auto hash_scalar = hash.convertToScalar();
1885 int h000 = hash_scalar (X , Y , Z );
1886 int h100 = hash_scalar (X+1, Y , Z );
1887 int h010 = hash_scalar (X , Y+1, Z );
1888 int h110 = hash_scalar (X+1, Y+1, Z );
1889 int h001 = hash_scalar (X , Y , Z+1);
1890 int h101 = hash_scalar (X+1, Y , Z+1);
1891 int h011 = hash_scalar (X , Y+1, Z+1);
1892 int h111 = hash_scalar (X+1, Y+1, Z+1);
1893
1894 // We are mimicking the OIIO_SSE behavior
1895 // result[0] = (h ) & 0xFF;
1896 // result[1] = (srl(h,8 )) & 0xFF;
1897 // result[2] = (srl(h,16)) & 0xFF;
1898 // skipping masking the 0th version, perhaps that is a mistake
1899
1900 float result0 = OIIO::trilerp (grad (h000, fx , fy , fz ),
1901 grad (h100, fx-1.0f, fy , fz ),
1902 grad (h010, fx , fy-1.0f, fz ),
1903 grad (h110, fx-1.0f, fy-1.0f, fz ),
1904 grad (h001, fx , fy , fz-1.0f ),
1905 grad (h101, fx-1.0f, fy , fz-1.0f ),
1906 grad (h011, fx , fy-1.0f, fz-1.0f ),
1907 grad (h111, fx-1.0f, fy-1.0f, fz-1.0f ),
1908 u, v, w);
1909
1910 float result1 = OIIO::trilerp (
1911 grad ((h000>>8) & 0xFF, fx , fy , fz ),
1912 grad ((h100>>8) & 0xFF, fx-1.0f, fy , fz ),
1913 grad ((h010>>8) & 0xFF, fx , fy-1.0f, fz ),
1914 grad ((h110>>8) & 0xFF, fx-1.0f, fy-1.0f, fz ),
1915 grad ((h001>>8) & 0xFF, fx , fy , fz-1.0f ),
1916 grad ((h101>>8) & 0xFF, fx-1.0f, fy , fz-1.0f ),
1917 grad ((h011>>8) & 0xFF, fx , fy-1.0f, fz-1.0f ),
1918 grad ((h111>>8) & 0xFF, fx-1.0f, fy-1.0f, fz-1.0f ),
1919 u, v, w);
1920
1921 float result2 = OIIO::trilerp (
1922 grad ((h000>>16) & 0xFF, fx , fy , fz ),
1923 grad ((h100>>16) & 0xFF, fx-1.0f, fy , fz ),
1924 grad ((h010>>16) & 0xFF, fx , fy-1.0f, fz ),
1925 grad ((h110>>16) & 0xFF, fx-1.0f, fy-1.0f, fz ),
1926 grad ((h001>>16) & 0xFF, fx , fy , fz-1.0f ),
1927 grad ((h101>>16) & 0xFF, fx-1.0f, fy , fz-1.0f ),
1928 grad ((h011>>16) & 0xFF, fx , fy-1.0f, fz-1.0f ),
1929 grad ((h111>>16) & 0xFF, fx-1.0f, fy-1.0f, fz-1.0f ),
1930 u, v, w);
1931
1932 result = Vec3(scale3 (result0), scale3 (result1), scale3 (result2));
1933 #endif
1934 }
1935 }
1936
1937 template <typename CGPolicyT = CGDefault, typename H>
perlin(Vec3 & result,const H & hash,const float & x,const float & y,const float & z,const float & w)1938 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Vec3 &result, const H &hash,
1939 const float &x, const float &y, const float &z, const float &w)
1940 {
1941 #if OIIO_SIMD
1942 if (CGPolicyT::allowSIMD)
1943 {
1944 int4 XYZW;
1945 float4 fxyzw = floorfrac (float4(x,y,z,w), &XYZW);
1946 float4 uvts = fade (fxyzw);
1947
1948 // We parallelize primarily by computing the hashes and gradients at the
1949 // integer lattice corners simultaneously. We need 8 total (for 3D), so
1950 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
1951 // with AVX.)
1952 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
1953 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
1954 int4 cornerx = OIIO::simd::shuffle<0>(XYZW) + (*(int4*)i0101);
1955 int4 cornery = OIIO::simd::shuffle<1>(XYZW) + (*(int4*)i0011);
1956 int4 cornerz = OIIO::simd::shuffle<2>(XYZW);
1957 int4 cornerw = OIIO::simd::shuffle<3>(XYZW);
1958
1959 // We actually derive 3 hashes (one for each output dimension) for each
1960 // corner.
1961 int4 corner_hash_z0[3], corner_hash_z1[3];
1962 int4 corner_hash_z2[3], corner_hash_z3[3];
1963 hash (corner_hash_z0, cornerx, cornery, cornerz, cornerw);
1964 hash (corner_hash_z1, cornerx, cornery, cornerz+int4::One(), cornerw);
1965 hash (corner_hash_z2, cornerx, cornery, cornerz, cornerw+int4::One());
1966 hash (corner_hash_z3, cornerx, cornery, cornerz+int4::One(), cornerw+int4::One());
1967
1968 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
1969 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
1970 float4 remainderx = OIIO::simd::shuffle<0>(fxyzw) - (*(float4*)f0101);
1971 float4 remaindery = OIIO::simd::shuffle<1>(fxyzw) - (*(float4*)f0011);
1972 float4 remainderz = OIIO::simd::shuffle<2>(fxyzw);
1973 float4 remainderw = OIIO::simd::shuffle<3>(fxyzw);
1974 // float4 remainderz0 = shuffle<2>(fxyz);
1975 // float4 remainderz1 = shuffle<2>(fxyz) - 1.0f;
1976 float result_comp[3];
1977 for (int i = 0; i < 3; ++i) {
1978 float4 corner_grad_z0 = grad (corner_hash_z0[i], remainderx, remaindery, remainderz, remainderw);
1979 float4 corner_grad_z1 = grad (corner_hash_z1[i], remainderx, remaindery, remainderz-float4::One(), remainderw);
1980 float4 corner_grad_z2 = grad (corner_hash_z2[i], remainderx, remaindery, remainderz, remainderw-float4::One());
1981 float4 corner_grad_z3 = grad (corner_hash_z3[i], remainderx, remaindery, remainderz-float4::One(), remainderw-float4::One());
1982 result_comp[i] = scale4 (OIIO::lerp (trilerp (corner_grad_z0, corner_grad_z1, uvts),
1983 trilerp (corner_grad_z2, corner_grad_z3, uvts),
1984 OIIO::simd::extract<3>(uvts)));
1985 }
1986 result = Vec3(result_comp[0],result_comp[1],result_comp[2]);
1987 } else
1988 #endif
1989 {
1990 // ORIGINAL -- non-SIMD
1991 int X; float fx = floorfrac(x, &X);
1992 int Y; float fy = floorfrac(y, &Y);
1993 int Z; float fz = floorfrac(z, &Z);
1994 int W; float fw = floorfrac(w, &W);
1995
1996 float u = fade(fx);
1997 float v = fade(fy);
1998 float t = fade(fz);
1999 float s = fade(fw);
2000
2001 auto l_result = OIIO::lerp (
2002 OIIO::trilerp (grad (hash (X , Y , Z , W ), fx , fy , fz , fw ),
2003 grad (hash (X+1, Y , Z , W ), fx-1.0f, fy , fz , fw ),
2004 grad (hash (X , Y+1, Z , W ), fx , fy-1.0f, fz , fw ),
2005 grad (hash (X+1, Y+1, Z , W ), fx-1.0f, fy-1.0f, fz , fw ),
2006 grad (hash (X , Y , Z+1, W ), fx , fy , fz-1.0f, fw ),
2007 grad (hash (X+1, Y , Z+1, W ), fx-1.0f, fy , fz-1.0f, fw ),
2008 grad (hash (X , Y+1, Z+1, W ), fx , fy-1.0f, fz-1.0f, fw ),
2009 grad (hash (X+1, Y+1, Z+1, W ), fx-1.0f, fy-1.0f, fz-1.0f, fw ),
2010 u, v, t),
2011 OIIO::trilerp (grad (hash (X , Y , Z , W+1), fx , fy , fz , fw-1.0f),
2012 grad (hash (X+1, Y , Z , W+1), fx-1.0f, fy , fz , fw-1.0f),
2013 grad (hash (X , Y+1, Z , W+1), fx , fy-1.0f, fz , fw-1.0f),
2014 grad (hash (X+1, Y+1, Z , W+1), fx-1.0f, fy-1.0f, fz , fw-1.0f),
2015 grad (hash (X , Y , Z+1, W+1), fx , fy , fz-1.0f, fw-1.0f),
2016 grad (hash (X+1, Y , Z+1, W+1), fx-1.0f, fy , fz-1.0f, fw-1.0f),
2017 grad (hash (X , Y+1, Z+1, W+1), fx , fy-1.0f, fz-1.0f, fw-1.0f),
2018 grad (hash (X+1, Y+1, Z+1, W+1), fx-1.0f, fy-1.0f, fz-1.0f, fw-1.0f),
2019 u, v, t),
2020 s);
2021 result = scale4 (l_result);
2022 }
2023 }
2024
2025
2026 template <typename CGPolicyT = CGDefault, typename H>
perlin(Dual2<Vec3> & result,const H & hash,const Dual2<float> & x,const Dual2<float> & y)2027 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
2028 const Dual2<float> &x, const Dual2<float> &y)
2029 {
2030 #if OIIO_SIMD
2031 if (CGPolicyT::allowSIMD)
2032 {
2033 int X; Dual2<float> fx = floorfrac(x, &X);
2034 int Y; Dual2<float> fy = floorfrac(y, &Y);
2035 Dual2<float4> fxyz (float4(fx.val(), fy.val(), 0.0f),
2036 float4(fx.dx(), fy.dx(), 0.0f),
2037 float4(fx.dy(), fy.dy(), 0.0f));
2038 Dual2<float4> uvw = fade (fxyz);
2039
2040 // We parallelize primarily by computing the hashes and gradients at the
2041 // integer lattice corners simultaneously. We need 8 total (for 3D), so
2042 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
2043 // with AVX.)
2044 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
2045 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
2046 int4 cornerx = X + (*(int4*)i0101);
2047 int4 cornery = Y + (*(int4*)i0011);
2048
2049 // We actually derive 3 hashes (one for each output dimension) for each
2050 // corner.
2051 int4 corner_hash[3];
2052 hash (corner_hash, cornerx, cornery);
2053
2054 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
2055 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
2056 Dual2<float4> remainderx = shuffle<0>(fxyz) - (*(float4*)f0101);
2057 Dual2<float4> remaindery = shuffle<1>(fxyz) - (*(float4*)f0011);
2058 Dual2<float> r[3];
2059 for (int i = 0; i < 3; ++i) {
2060 Dual2<float4> corner_grad = grad (corner_hash[i], remainderx, remaindery);
2061 // Interpolate along x axis
2062 Dual2<float4> xx = OIIO::lerp (corner_grad, shuffle<1,1,3,3>(corner_grad), shuffle<0>(uvw));
2063 // interpolate along y axis
2064 r[i] = scale2 (extract<0>(OIIO::lerp (xx,shuffle<2>(xx), shuffle<1>(uvw))));
2065 }
2066 result.set (Vec3 (r[0].val(), r[1].val(), r[2].val()),
2067 Vec3 (r[0].dx(), r[1].dx(), r[2].dx()),
2068 Vec3 (r[0].dy(), r[1].dy(), r[2].dy()));
2069 } else
2070 #endif
2071 {
2072 // ORIGINAL -- non-SIMD
2073 int X; Dual2<float> fx = floorfrac(x, &X);
2074 int Y; Dual2<float> fy = floorfrac(y, &Y);
2075 Dual2<float> u = fade(fx);
2076 Dual2<float> v = fade(fy);
2077 auto bil_result = OIIO::bilerp (grad (hash (X , Y ), fx , fy ),
2078 grad (hash (X+1, Y ), fx-1.0f, fy ),
2079 grad (hash (X , Y+1), fx , fy-1.0f),
2080 grad (hash (X+1, Y+1), fx-1.0f, fy-1.0f),
2081 u, v);
2082 result = scale2 (bil_result);
2083 }
2084 }
2085
2086
2087 template <typename CGPolicyT = CGDefault, typename H>
perlin(Dual2<Vec3> & result,const H & hash,const Dual2<float> & x,const Dual2<float> & y,const Dual2<float> & z)2088 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
2089 const Dual2<float> &x, const Dual2<float> &y, const Dual2<float> &z)
2090 {
2091 #if OIIO_SIMD
2092 if (CGPolicyT::allowSIMD)
2093 {
2094 int X; Dual2<float> fx = floorfrac(x, &X);
2095 int Y; Dual2<float> fy = floorfrac(y, &Y);
2096 int Z; Dual2<float> fz = floorfrac(z, &Z);
2097 Dual2<float4> fxyz (float4(fx.val(), fy.val(), fz.val()),
2098 float4(fx.dx(), fy.dx(), fz.dx()),
2099 float4(fx.dy(), fy.dy(), fz.dy()));
2100 Dual2<float4> uvw = fade (fxyz);
2101
2102 // We parallelize primarily by computing the hashes and gradients at the
2103 // integer lattice corners simultaneously. We need 8 total (for 3D), so
2104 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
2105 // with AVX.)
2106 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
2107 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
2108 int4 cornerx = X + (*(int4*)i0101);
2109 int4 cornery = Y + (*(int4*)i0011);
2110 int4 cornerz = Z;
2111
2112 // We actually derive 3 hashes (one for each output dimension) for each
2113 // corner.
2114 int4 corner_hash_z0[3], corner_hash_z1[3];
2115 hash (corner_hash_z0, cornerx, cornery, cornerz);
2116 hash (corner_hash_z1, cornerx, cornery, cornerz+int4::One());
2117
2118 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
2119 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
2120 Dual2<float4> remainderx = shuffle<0>(fxyz) - (*(float4*)f0101);
2121 Dual2<float4> remaindery = shuffle<1>(fxyz) - (*(float4*)f0011);
2122 Dual2<float4> remainderz0 = shuffle<2>(fxyz);
2123 Dual2<float4> remainderz1 = shuffle<2>(fxyz) - float4::One();
2124 Dual2<float> r[3];
2125 for (int i = 0; i < 3; ++i) {
2126 Dual2<float4> corner_grad_z0 = grad (corner_hash_z0[i], remainderx, remaindery, remainderz0);
2127 Dual2<float4> corner_grad_z1 = grad (corner_hash_z1[i], remainderx, remaindery, remainderz1);
2128
2129 // Interpolate along the z axis first
2130 Dual2<float4> xy = OIIO::lerp (corner_grad_z0, corner_grad_z1, shuffle<2>(uvw));
2131 // Interpolate along x axis
2132 Dual2<float4> xx = OIIO::lerp (xy, shuffle<1,1,3,3>(xy), shuffle<0>(uvw));
2133 // interpolate along y axis
2134 r[i] = scale3 (extract<0>(OIIO::lerp (xx,shuffle<2>(xx), shuffle<1>(uvw))));
2135 }
2136 result.set (Vec3 (r[0].val(), r[1].val(), r[2].val()),
2137 Vec3 (r[0].dx(), r[1].dx(), r[2].dx()),
2138 Vec3 (r[0].dy(), r[1].dy(), r[2].dy()));
2139 } else
2140 #endif
2141 {
2142 // ORIGINAL -- non-SIMD
2143 int X; Dual2<float> fx = floorfrac(x, &X);
2144 int Y; Dual2<float> fy = floorfrac(y, &Y);
2145 int Z; Dual2<float> fz = floorfrac(z, &Z);
2146 Dual2<float> u = fade(fx);
2147 Dual2<float> v = fade(fy);
2148 Dual2<float> w = fade(fz);
2149 auto til_result = OIIO::trilerp (grad (hash (X , Y , Z ), fx , fy , fz ),
2150 grad (hash (X+1, Y , Z ), fx-1.0f, fy , fz ),
2151 grad (hash (X , Y+1, Z ), fx , fy-1.0f, fz ),
2152 grad (hash (X+1, Y+1, Z ), fx-1.0f, fy-1.0f, fz ),
2153 grad (hash (X , Y , Z+1), fx , fy , fz-1.0f ),
2154 grad (hash (X+1, Y , Z+1), fx-1.0f, fy , fz-1.0f ),
2155 grad (hash (X , Y+1, Z+1), fx , fy-1.0f, fz-1.0f ),
2156 grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f ),
2157 u, v, w);
2158 result = scale3 (til_result);
2159 }
2160 }
2161
2162
2163 template <typename CGPolicyT = CGDefault, typename H>
perlin(Dual2<Vec3> & result,const H & hash,const Dual2<float> & x,const Dual2<float> & y,const Dual2<float> & z,const Dual2<float> & w)2164 OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
2165 const Dual2<float> &x, const Dual2<float> &y,
2166 const Dual2<float> &z, const Dual2<float> &w)
2167 {
2168 #if OIIO_SIMD
2169 if (CGPolicyT::allowSIMD)
2170 {
2171 int X; Dual2<float> fx = floorfrac(x, &X);
2172 int Y; Dual2<float> fy = floorfrac(y, &Y);
2173 int Z; Dual2<float> fz = floorfrac(z, &Z);
2174 int W; Dual2<float> fw = floorfrac(w, &W);
2175 Dual2<float4> fxyzw (float4(fx.val(), fy.val(), fz.val(), fw.val()),
2176 float4(fx.dx (), fy.dx (), fz.dx (), fw.dx ()),
2177 float4(fx.dy (), fy.dy (), fz.dy (), fw.dy ()));
2178 Dual2<float4> uvts = fade (fxyzw);
2179
2180 // We parallelize primarily by computing the hashes and gradients at the
2181 // integer lattice corners simultaneously. We need 8 total (for 3D), so
2182 // we do two sets of 4. (Future opportunity to do all 8 simultaneously
2183 // with AVX.)
2184 static const OIIO_SIMD4_ALIGN int i0101[4] = {0,1,0,1};
2185 static const OIIO_SIMD4_ALIGN int i0011[4] = {0,0,1,1};
2186 int4 cornerx = X + (*(int4*)i0101);
2187 int4 cornery = Y + (*(int4*)i0011);
2188 int4 cornerz = Z;
2189 int4 cornerz1 = Z + int4::One();
2190 int4 cornerw = W;
2191 int4 cornerw1 = W + int4::One();
2192
2193 // We actually derive 3 hashes (one for each output dimension) for each
2194 // corner.
2195 int4 corner_hash_z0[3], corner_hash_z1[3], corner_hash_z2[3], corner_hash_z3[3];
2196 hash (corner_hash_z0, cornerx, cornery, cornerz, cornerw);
2197 hash (corner_hash_z1, cornerx, cornery, cornerz1, cornerw);
2198 hash (corner_hash_z2, cornerx, cornery, cornerz, cornerw1);
2199 hash (corner_hash_z3, cornerx, cornery, cornerz1, cornerw1);
2200
2201 static const OIIO_SIMD4_ALIGN float f0101[4] = {0,1,0,1};
2202 static const OIIO_SIMD4_ALIGN float f0011[4] = {0,0,1,1};
2203 Dual2<float4> remainderx = shuffle<0>(fxyzw) - (*(float4*)f0101);
2204 Dual2<float4> remaindery = shuffle<1>(fxyzw) - (*(float4*)f0011);
2205 Dual2<float4> remainderz = shuffle<2>(fxyzw);
2206 Dual2<float4> remainderz1 = remainderz - float4::One();
2207 Dual2<float4> remainderw = shuffle<3>(fxyzw);
2208 Dual2<float4> remainderw1 = remainderw - float4::One();
2209
2210 Dual2<float> r[3];
2211 for (int i = 0; i < 3; ++i) {
2212 Dual2<float4> corner_grad_z0 = grad (corner_hash_z0[i], remainderx, remaindery, remainderz, remainderw);
2213 Dual2<float4> corner_grad_z1 = grad (corner_hash_z1[i], remainderx, remaindery, remainderz1, remainderw);
2214 Dual2<float4> corner_grad_z2 = grad (corner_hash_z2[i], remainderx, remaindery, remainderz, remainderw1);
2215 Dual2<float4> corner_grad_z3 = grad (corner_hash_z3[i], remainderx, remaindery, remainderz1, remainderw1);
2216
2217 // Interpolate along the w axis first
2218 Dual2<float4> xyz0 = OIIO::lerp (corner_grad_z0, corner_grad_z2, shuffle<3>(uvts));
2219 Dual2<float4> xyz1 = OIIO::lerp (corner_grad_z1, corner_grad_z3, shuffle<3>(uvts));
2220 Dual2<float4> xy = OIIO::lerp (xyz0, xyz1, shuffle<2>(uvts));
2221 // Interpolate along x axis
2222 Dual2<float4> xx = OIIO::lerp (xy, shuffle<1,1,3,3>(xy), shuffle<0>(uvts));
2223 // interpolate along y axis
2224 r[i] = scale4 (extract<0>(OIIO::lerp (xx,shuffle<2>(xx), shuffle<1>(uvts))));
2225
2226 }
2227 result.set (Vec3 (r[0].val(), r[1].val(), r[2].val()),
2228 Vec3 (r[0].dx(), r[1].dx(), r[2].dx()),
2229 Vec3 (r[0].dy(), r[1].dy(), r[2].dy()));
2230 } else
2231 #endif
2232 {
2233 // ORIGINAL -- non-SIMD
2234 int X; Dual2<float> fx = floorfrac(x, &X);
2235 int Y; Dual2<float> fy = floorfrac(y, &Y);
2236 int Z; Dual2<float> fz = floorfrac(z, &Z);
2237 int W; Dual2<float> fw = floorfrac(w, &W);
2238
2239 Dual2<float> u = fade(fx);
2240 Dual2<float> v = fade(fy);
2241 Dual2<float> t = fade(fz);
2242 Dual2<float> s = fade(fw);
2243
2244 auto l_result = OIIO::lerp (
2245 OIIO::trilerp (grad (hash (X , Y , Z , W ), fx , fy , fz , fw ),
2246 grad (hash (X+1, Y , Z , W ), fx-1.0f, fy , fz , fw ),
2247 grad (hash (X , Y+1, Z , W ), fx , fy-1.0f, fz , fw ),
2248 grad (hash (X+1, Y+1, Z , W ), fx-1.0f, fy-1.0f, fz , fw ),
2249 grad (hash (X , Y , Z+1, W ), fx , fy , fz-1.0f, fw ),
2250 grad (hash (X+1, Y , Z+1, W ), fx-1.0f, fy , fz-1.0f, fw ),
2251 grad (hash (X , Y+1, Z+1, W ), fx , fy-1.0f, fz-1.0f, fw ),
2252 grad (hash (X+1, Y+1, Z+1, W ), fx-1.0f, fy-1.0f, fz-1.0f, fw ),
2253 u, v, t),
2254 OIIO::trilerp (grad (hash (X , Y , Z , W+1), fx , fy , fz , fw-1.0f),
2255 grad (hash (X+1, Y , Z , W+1), fx-1.0f, fy , fz , fw-1.0f),
2256 grad (hash (X , Y+1, Z , W+1), fx , fy-1.0f, fz , fw-1.0f),
2257 grad (hash (X+1, Y+1, Z , W+1), fx-1.0f, fy-1.0f, fz , fw-1.0f),
2258 grad (hash (X , Y , Z+1, W+1), fx , fy , fz-1.0f, fw-1.0f),
2259 grad (hash (X+1, Y , Z+1, W+1), fx-1.0f, fy , fz-1.0f, fw-1.0f),
2260 grad (hash (X , Y+1, Z+1, W+1), fx , fy-1.0f, fz-1.0f, fw-1.0f),
2261 grad (hash (X+1, Y+1, Z+1, W+1), fx-1.0f, fy-1.0f, fz-1.0f, fw-1.0f),
2262 u, v, t),
2263 s);
2264
2265 result = scale4 (l_result);
2266 }
2267 }
2268
2269
2270
2271 template<typename CGPolicyT = CGDefault>
2272 struct NoiseImpl {
NoiseImplNoiseImpl2273 OSL_FORCEINLINE OSL_HOSTDEVICE NoiseImpl () { }
2274
operatorNoiseImpl2275 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x) const {
2276 HashScalar h;
2277 float perlin_result;
2278 perlin<CGPolicyT>(perlin_result, h, x);
2279 result = 0.5f * (perlin_result + 1.0f);
2280 }
2281
operatorNoiseImpl2282 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float y) const {
2283 HashScalar h;
2284 float perlin_result;
2285 perlin<CGPolicyT>(perlin_result, h, x, y);
2286 result = 0.5f * (perlin_result + 1.0f);
2287 }
2288
operatorNoiseImpl2289 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p) const {
2290 HashScalar h;
2291 float perlin_result;
2292 perlin<CGPolicyT>(perlin_result, h, p.x, p.y, p.z);
2293 result = 0.5f * (perlin_result + 1.0f);
2294 }
2295
operatorNoiseImpl2296 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t) const {
2297 HashScalar h;
2298 float perlin_result;
2299 perlin<CGPolicyT>(perlin_result, h, p.x, p.y, p.z, t);
2300 result = 0.5f * (perlin_result + 1.0f);
2301 }
2302
operatorNoiseImpl2303 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x) const {
2304 HashVector h;
2305 Vec3 perlin_result;
2306 perlin<CGPolicyT>(perlin_result, h, x);
2307 result = 0.5f * (perlin_result + Vec3(1.0f, 1.0f, 1.0f));
2308 }
2309
operatorNoiseImpl2310 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y) const {
2311 HashVector h;
2312 Vec3 perlin_result;
2313 perlin<CGPolicyT>(perlin_result, h, x, y);
2314 result = 0.5f * (perlin_result + Vec3(1.0f, 1.0f, 1.0f));
2315 }
2316
operatorNoiseImpl2317 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p) const {
2318 HashVector h;
2319 Vec3 perlin_result;
2320 perlin<CGPolicyT>(perlin_result, h, p.x, p.y, p.z);
2321 result = 0.5f * (perlin_result + Vec3(1.0f, 1.0f, 1.0f));
2322 }
2323
operatorNoiseImpl2324 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t) const {
2325 HashVector h;
2326 Vec3 perlin_result;
2327 perlin<CGPolicyT>(perlin_result, h, p.x, p.y, p.z, t);
2328 result = 0.5f * (perlin_result + Vec3(1.0f, 1.0f, 1.0f));
2329 }
2330
2331
2332 // dual versions
2333
operatorNoiseImpl2334 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x) const {
2335 HashScalar h;
2336 Dual2<float> perlin_result;
2337 perlin<CGPolicyT>(perlin_result, h, x);
2338 result = 0.5f * (perlin_result + 1.0f);
2339 }
2340
operatorNoiseImpl2341 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x, const Dual2<float> &y) const {
2342 HashScalar h;
2343 Dual2<float> perlin_result;
2344 perlin<CGPolicyT>(perlin_result, h, x, y);
2345 result = 0.5f * (perlin_result + 1.0f);
2346 }
2347
operatorNoiseImpl2348 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p) const {
2349 HashScalar h;
2350 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2351 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2352 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2353 Dual2<float> perlin_result;
2354 perlin<CGPolicyT>(perlin_result, h, px, py, pz);
2355 result = 0.5f * (perlin_result + 1.0f);
2356 }
2357
operatorNoiseImpl2358 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
2359 HashScalar h;
2360 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2361 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2362 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2363 Dual2<float> perlin_result;
2364 perlin<CGPolicyT>(perlin_result, h, px, py, pz, t);
2365 result = 0.5f * (perlin_result + 1.0f);
2366 }
2367
operatorNoiseImpl2368 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x) const {
2369 HashVector h;
2370 Dual2<Vec3> perlin_result;
2371 perlin<CGPolicyT>(perlin_result, h, x);
2372 result = Vec3(0.5f, 0.5f, 0.5f) * (perlin_result + Vec3(1, 1, 1));
2373 }
2374
operatorNoiseImpl2375 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y) const {
2376 HashVector h;
2377 Dual2<Vec3> perlin_result;
2378 perlin<CGPolicyT>(perlin_result, h, x, y);
2379 result = Vec3(0.5f, 0.5f, 0.5f) * (perlin_result + Vec3(1, 1, 1));
2380 }
2381
operatorNoiseImpl2382 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p) const {
2383 HashVector h;
2384 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2385 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2386 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2387 Dual2<Vec3> perlin_result;
2388 perlin<CGPolicyT>(perlin_result, h, px, py, pz);
2389 result = Vec3(0.5f, 0.5f, 0.5f) * (perlin_result + Vec3(1, 1, 1));
2390 }
2391
operatorNoiseImpl2392 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
2393 HashVector h;
2394 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2395 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2396 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2397 Dual2<Vec3> perlin_result;
2398 perlin<CGPolicyT>(perlin_result, h, px, py, pz, t);
2399 result = Vec3(0.5f, 0.5f, 0.5f) * (perlin_result + Vec3(1, 1, 1));
2400 }
2401 };
2402
2403 struct Noise : NoiseImpl<CGDefault> {};
2404 // Scalar version of Noise that is SIMD friendly suitable to be
2405 // inlined inside of a SIMD loops
2406 struct NoiseScalar : NoiseImpl<CGScalar> {};
2407
2408
2409 template<typename CGPolicyT = CGDefault>
2410 struct SNoiseImpl {
SNoiseImplSNoiseImpl2411 OSL_FORCEINLINE OSL_HOSTDEVICE SNoiseImpl () { }
2412
operatorSNoiseImpl2413 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x) const {
2414 HashScalar h;
2415 perlin<CGPolicyT>(result, h, x);
2416 }
2417
operatorSNoiseImpl2418 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float y) const {
2419 HashScalar h;
2420 perlin<CGPolicyT>(result, h, x, y);
2421 }
2422
operatorSNoiseImpl2423 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p) const {
2424 HashScalar h;
2425 perlin<CGPolicyT>(result, h, p.x, p.y, p.z);
2426 }
2427
operatorSNoiseImpl2428 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t) const {
2429 HashScalar h;
2430 perlin<CGPolicyT>(result, h, p.x, p.y, p.z, t);
2431 }
2432
operatorSNoiseImpl2433 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x) const {
2434 HashVector h;
2435 perlin<CGPolicyT>(result, h, x);
2436 }
2437
operatorSNoiseImpl2438 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y) const {
2439 HashVector h;
2440 perlin<CGPolicyT>(result, h, x, y);
2441 }
2442
operatorSNoiseImpl2443 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p) const {
2444 HashVector h;
2445 perlin<CGPolicyT>(result, h, p.x, p.y, p.z);
2446 }
2447
operatorSNoiseImpl2448 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t) const {
2449 HashVector h;
2450 perlin<CGPolicyT>(result, h, p.x, p.y, p.z, t);
2451 }
2452
2453
2454 // dual versions
2455
operatorSNoiseImpl2456 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x) const {
2457 HashScalar h;
2458 perlin<CGPolicyT>(result, h, x);
2459 }
2460
operatorSNoiseImpl2461 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x, const Dual2<float> &y) const {
2462 HashScalar h;
2463 perlin<CGPolicyT>(result, h, x, y);
2464 }
2465
operatorSNoiseImpl2466 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p) const {
2467 HashScalar h;
2468 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2469 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2470 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2471 perlin<CGPolicyT>(result, h, px, py, pz);
2472 }
2473
operatorSNoiseImpl2474 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
2475 HashScalar h;
2476 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2477 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2478 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2479 perlin<CGPolicyT>(result, h, px, py, pz, t);
2480 }
2481
operatorSNoiseImpl2482 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x) const {
2483 HashVector h;
2484 perlin<CGPolicyT>(result, h, x);
2485 }
2486
2487
operatorSNoiseImpl2488 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y) const {
2489 HashVector h;
2490 perlin<CGPolicyT>(result, h, x, y);
2491 }
2492
operatorSNoiseImpl2493 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p) const {
2494 HashVector h;
2495 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2496 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2497 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2498 perlin<CGPolicyT>(result, h, px, py, pz);
2499 }
2500
operatorSNoiseImpl2501 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
2502 HashVector h;
2503 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2504 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2505 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2506 perlin<CGPolicyT>(result, h, px, py, pz, t);
2507 }
2508 };
2509
2510 struct SNoise : SNoiseImpl<CGDefault> {};
2511 // Scalar version of SNoise that is SIMD friendly suitable to be
2512 // inlined inside of a SIMD loops
2513 struct SNoiseScalar : SNoiseImpl<CGScalar> {};
2514
2515
2516
2517 template<typename CGPolicyT = CGDefault>
2518 struct PeriodicNoiseImpl {
PeriodicNoiseImplPeriodicNoiseImpl2519 OSL_FORCEINLINE OSL_HOSTDEVICE PeriodicNoiseImpl () { }
2520
operatorPeriodicNoiseImpl2521 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float px) const {
2522 HashScalarPeriodic h(px);
2523 perlin<CGPolicyT>(result, h, x);
2524 result = 0.5f * (result + 1.0f);
2525 }
2526
operatorPeriodicNoiseImpl2527 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float y, float px, float py) const {
2528 HashScalarPeriodic h(px, py);
2529 perlin<CGPolicyT>(result, h, x, y);
2530 result = 0.5f * (result + 1.0f);
2531 }
2532
operatorPeriodicNoiseImpl2533 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, const Vec3 &pp) const {
2534 HashScalarPeriodic h(pp.x, pp.y, pp.z);
2535 perlin<CGPolicyT>(result, h, p.x, p.y, p.z);
2536 result = 0.5f * (result + 1.0f);
2537 }
2538
operatorPeriodicNoiseImpl2539 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t, const Vec3 &pp, float pt) const {
2540 HashScalarPeriodic h(pp.x, pp.y, pp.z, pt);
2541 perlin<CGPolicyT>(result, h, p.x, p.y, p.z, t);
2542 result = 0.5f * (result + 1.0f);
2543 }
2544
operatorPeriodicNoiseImpl2545 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float px) const {
2546 HashVectorPeriodic h(px);
2547 perlin<CGPolicyT>(result, h, x);
2548 result = 0.5f * (result + Vec3(1.0f, 1.0f, 1.0f));
2549 }
2550
operatorPeriodicNoiseImpl2551 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y, float px, float py) const {
2552 HashVectorPeriodic h(px, py);
2553 perlin<CGPolicyT>(result, h, x, y);
2554 result = 0.5f * (result + Vec3(1.0f, 1.0f, 1.0f));
2555 }
2556
operatorPeriodicNoiseImpl2557 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, const Vec3 &pp) const {
2558 HashVectorPeriodic h(pp.x, pp.y, pp.z);
2559 perlin<CGPolicyT>(result, h, p.x, p.y, p.z);
2560 result = 0.5f * (result + Vec3(1.0f, 1.0f, 1.0f));
2561 }
2562
operatorPeriodicNoiseImpl2563 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t, const Vec3 &pp, float pt) const {
2564 HashVectorPeriodic h(pp.x, pp.y, pp.z, pt);
2565 perlin<CGPolicyT>(result, h, p.x, p.y, p.z, t);
2566 result = 0.5f * (result + Vec3(1.0f, 1.0f, 1.0f));
2567 }
2568
2569 // dual versions
2570
operatorPeriodicNoiseImpl2571 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x, float px) const {
2572 HashScalarPeriodic h(px);
2573 perlin<CGPolicyT>(result, h, x);
2574 result = 0.5f * (result + 1.0f);
2575 }
2576
operatorPeriodicNoiseImpl2577 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x, const Dual2<float> &y,
2578 float px, float py) const {
2579 HashScalarPeriodic h(px, py);
2580 perlin<CGPolicyT>(result, h, x, y);
2581 result = 0.5f * (result + 1.0f);
2582 }
2583
operatorPeriodicNoiseImpl2584 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p, const Vec3 &pp) const {
2585 HashScalarPeriodic h(pp.x, pp.y, pp.z);
2586 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2587 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2588 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2589 perlin<CGPolicyT>(result, h, px, py, pz);
2590 result = 0.5f * (result + 1.0f);
2591 }
2592
operatorPeriodicNoiseImpl2593 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p, const Dual2<float> &t,
2594 const Vec3 &pp, float pt) const {
2595 HashScalarPeriodic h(pp.x, pp.y, pp.z, pt);
2596 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2597 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2598 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2599 perlin<CGPolicyT>(result, h, px, py, pz, t);
2600 result = 0.5f * (result + 1.0f);
2601 }
2602
operatorPeriodicNoiseImpl2603 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, float px) const {
2604 HashVectorPeriodic h(px);
2605 perlin<CGPolicyT>(result, h, x);
2606 result = Vec3(0.5f, 0.5f, 0.5f) * (result + Vec3(1.0f, 1.0f, 1.0f));
2607 }
2608
operatorPeriodicNoiseImpl2609 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y,
2610 float px, float py) const {
2611 HashVectorPeriodic h(px, py);
2612 perlin<CGPolicyT>(result, h, x, y);
2613 result = Vec3(0.5f, 0.5f, 0.5f) * (result + Vec3(1.0f, 1.0f, 1.0f));
2614 }
2615
operatorPeriodicNoiseImpl2616 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Vec3 &pp) const {
2617 HashVectorPeriodic h(pp.x, pp.y, pp.z);
2618 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2619 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2620 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2621 perlin<CGPolicyT>(result, h, px, py, pz);
2622 result = Vec3(0.5f, 0.5f, 0.5f) * (result + Vec3(1.0f, 1.0f, 1.0f));
2623 }
2624
operatorPeriodicNoiseImpl2625 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t,
2626 const Vec3 &pp, float pt) const {
2627 HashVectorPeriodic h(pp.x, pp.y, pp.z, pt);
2628 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2629 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2630 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2631 perlin<CGPolicyT>(result, h, px, py, pz, t);
2632 result = Vec3(0.5f, 0.5f, 0.5f) * (result + Vec3(1.0f, 1.0f, 1.0f));
2633 }
2634
2635 };
2636
2637 struct PeriodicNoise : PeriodicNoiseImpl<CGDefault> {};
2638 // Scalar version of PeriodicNoise that is SIMD friendly suitable to be
2639 // inlined inside of a SIMD loops
2640 struct PeriodicNoiseScalar : PeriodicNoiseImpl<CGScalar> {};
2641
2642 template<typename CGPolicyT = CGDefault>
2643 struct PeriodicSNoiseImpl {
PeriodicSNoiseImplPeriodicSNoiseImpl2644 OSL_FORCEINLINE OSL_HOSTDEVICE PeriodicSNoiseImpl () { }
2645
operatorPeriodicSNoiseImpl2646 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float px) const {
2647 HashScalarPeriodic h(px);
2648 perlin<CGPolicyT>(result, h, x);
2649 }
2650
operatorPeriodicSNoiseImpl2651 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, float x, float y, float px, float py) const {
2652 HashScalarPeriodic h(px, py);
2653 perlin<CGPolicyT>(result, h, x, y);
2654 }
2655
operatorPeriodicSNoiseImpl2656 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, const Vec3 &pp) const {
2657 HashScalarPeriodic h(pp.x, pp.y, pp.z);
2658 perlin<CGPolicyT>(result, h, p.x, p.y, p.z);
2659 }
2660
operatorPeriodicSNoiseImpl2661 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t, const Vec3 &pp, float pt) const {
2662 HashScalarPeriodic h(pp.x, pp.y, pp.z, pt);
2663 perlin<CGPolicyT>(result, h, p.x, p.y, p.z, t);
2664 }
2665
operatorPeriodicSNoiseImpl2666 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float px) const {
2667 HashVectorPeriodic h(px);
2668 perlin<CGPolicyT>(result, h, x);
2669 }
2670
operatorPeriodicSNoiseImpl2671 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y, float px, float py) const {
2672 HashVectorPeriodic h(px, py);
2673 perlin<CGPolicyT>(result, h, x, y);
2674 }
2675
operatorPeriodicSNoiseImpl2676 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, const Vec3 &pp) const {
2677 HashVectorPeriodic h(pp.x, pp.y, pp.z);
2678 perlin<CGPolicyT>(result, h, p.x, p.y, p.z);
2679 }
2680
operatorPeriodicSNoiseImpl2681 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t, const Vec3 &pp, float pt) const {
2682 HashVectorPeriodic h(pp.x, pp.y, pp.z, pt);
2683 perlin<CGPolicyT>(result, h, p.x, p.y, p.z, t);
2684 }
2685
2686 // dual versions
2687
operatorPeriodicSNoiseImpl2688 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x, float px) const {
2689 HashScalarPeriodic h(px);
2690 perlin<CGPolicyT>(result, h, x);
2691 }
2692
operatorPeriodicSNoiseImpl2693 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x, const Dual2<float> &y,
2694 float px, float py) const {
2695 HashScalarPeriodic h(px, py);
2696 perlin<CGPolicyT>(result, h, x, y);
2697 }
2698
operatorPeriodicSNoiseImpl2699 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p, const Vec3 &pp) const {
2700 HashScalarPeriodic h(pp.x, pp.y, pp.z);
2701 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2702 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2703 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2704 perlin<CGPolicyT>(result, h, px, py, pz);
2705 }
2706
operatorPeriodicSNoiseImpl2707 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p, const Dual2<float> &t,
2708 const Vec3 &pp, float pt) const {
2709 HashScalarPeriodic h(pp.x, pp.y, pp.z, pt);
2710 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2711 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2712 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2713 perlin<CGPolicyT>(result, h, px, py, pz, t);
2714 }
2715
operatorPeriodicSNoiseImpl2716 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, float px) const {
2717 HashVectorPeriodic h(px);
2718 perlin<CGPolicyT>(result, h, x);
2719 }
2720
operatorPeriodicSNoiseImpl2721 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y,
2722 float px, float py) const {
2723 HashVectorPeriodic h(px, py);
2724 perlin<CGPolicyT>(result, h, x, y);
2725 }
2726
operatorPeriodicSNoiseImpl2727 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Vec3 &pp) const {
2728 HashVectorPeriodic h(pp.x, pp.y, pp.z);
2729 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2730 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2731 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2732 perlin<CGPolicyT>(result, h, px, py, pz);
2733 }
2734
operatorPeriodicSNoiseImpl2735 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t,
2736 const Vec3 &pp, float pt) const {
2737 HashVectorPeriodic h(pp.x, pp.y, pp.z, pt);
2738 Dual2<float> px(p.val().x, p.dx().x, p.dy().x);
2739 Dual2<float> py(p.val().y, p.dx().y, p.dy().y);
2740 Dual2<float> pz(p.val().z, p.dx().z, p.dy().z);
2741 perlin<CGPolicyT>(result, h, px, py, pz, t);
2742 }
2743
2744 };
2745
2746 struct PeriodicSNoise : PeriodicSNoiseImpl<CGDefault> {};
2747 // Scalar version of PeriodicSNoise that is SIMD friendly suitable to be
2748 // inlined inside of a SIMD loops
2749 struct PeriodicSNoiseScalar : PeriodicSNoiseImpl<CGScalar> {};
2750
2751
2752 struct SimplexNoise {
SimplexNoiseSimplexNoise2753 OSL_HOSTDEVICE SimplexNoise () { }
2754
operatorSimplexNoise2755 inline OSL_HOSTDEVICE void operator() (float &result, float x) const {
2756 result = simplexnoise1 (x);
2757 }
2758
operatorSimplexNoise2759 inline OSL_HOSTDEVICE void operator() (float &result, float x, float y) const {
2760 result = simplexnoise2 (x, y);
2761 }
2762
operatorSimplexNoise2763 inline OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p) const {
2764 result = simplexnoise3 (p.x, p.y, p.z);
2765 }
2766
operatorSimplexNoise2767 inline OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t) const {
2768 result = simplexnoise4 (p.x, p.y, p.z, t);
2769 }
2770
operatorSimplexNoise2771 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x) const {
2772 result.x = simplexnoise1 (x, 0);
2773 result.y = simplexnoise1 (x, 1);
2774 result.z = simplexnoise1 (x, 2);
2775 }
2776
operatorSimplexNoise2777 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y) const {
2778 result.x = simplexnoise2 (x, y, 0);
2779 result.y = simplexnoise2 (x, y, 1);
2780 result.z = simplexnoise2 (x, y, 2);
2781 }
2782
operatorSimplexNoise2783 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p) const {
2784 result.x = simplexnoise3 (p.x, p.y, p.z, 0);
2785 result.y = simplexnoise3 (p.x, p.y, p.z, 1);
2786 result.z = simplexnoise3 (p.x, p.y, p.z, 2);
2787 }
2788
operatorSimplexNoise2789 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t) const {
2790 result.x = simplexnoise4 (p.x, p.y, p.z, t, 0);
2791 result.y = simplexnoise4 (p.x, p.y, p.z, t, 1);
2792 result.z = simplexnoise4 (p.x, p.y, p.z, t, 2);
2793 }
2794
2795
2796 // dual versions
2797
operatorSimplexNoise2798 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x,
2799 int seed=0) const {
2800 float r, dndx;
2801 r = simplexnoise1 (x.val(), seed, &dndx);
2802 result.set (r, dndx * x.dx(), dndx * x.dy());
2803 }
2804
operatorSimplexNoise2805 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x,
2806 const Dual2<float> &y, int seed=0) const {
2807 float r, dndx, dndy;
2808 r = simplexnoise2 (x.val(), y.val(), seed, &dndx, &dndy);
2809 result.set (r, dndx * x.dx() + dndy * y.dx(),
2810 dndx * x.dy() + dndy * y.dy());
2811 }
2812
operatorSimplexNoise2813 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p,
2814 int seed=0) const {
2815 float r, dndx, dndy, dndz;
2816 r = simplexnoise3 (p.val().x, p.val().y, p.val().z,
2817 seed, &dndx, &dndy, &dndz);
2818 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z,
2819 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z);
2820 }
2821
operatorSimplexNoise2822 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p,
2823 const Dual2<float> &t, int seed=0) const {
2824 float r, dndx, dndy, dndz, dndt;
2825 r = simplexnoise4 (p.val().x, p.val().y, p.val().z, t.val(),
2826 seed, &dndx, &dndy, &dndz, &dndt);
2827 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z + dndt * t.dx(),
2828 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z + dndt * t.dy());
2829 }
2830
operatorSimplexNoise2831 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x) const {
2832 Dual2<float> r0, r1, r2;
2833 (*this)(r0, x, 0);
2834 (*this)(r1, x, 1);
2835 (*this)(r2, x, 2);
2836 result = make_Vec3 (r0, r1, r2);
2837 }
2838
operatorSimplexNoise2839 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y) const {
2840 Dual2<float> r0, r1, r2;
2841 (*this)(r0, x, y, 0);
2842 (*this)(r1, x, y, 1);
2843 (*this)(r2, x, y, 2);
2844 result = make_Vec3 (r0, r1, r2);
2845 }
2846
operatorSimplexNoise2847 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p) const {
2848 Dual2<float> r0, r1, r2;
2849 (*this)(r0, p, 0);
2850 (*this)(r1, p, 1);
2851 (*this)(r2, p, 2);
2852 result = make_Vec3 (r0, r1, r2);
2853 }
2854
operatorSimplexNoise2855 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
2856 Dual2<float> r0, r1, r2;
2857 (*this)(r0, p, t, 0);
2858 (*this)(r1, p, t, 1);
2859 (*this)(r2, p, t, 2);
2860 result = make_Vec3 (r0, r1, r2);
2861 }
2862 };
2863
2864
2865 // Scalar version of SimplexNoise that is SIMD friendly suitable to be
2866 // inlined inside of a SIMD loops
2867 struct SimplexNoiseScalar {
SimplexNoiseScalarSimplexNoiseScalar2868 SimplexNoiseScalar () { }
2869
operatorSimplexNoiseScalar2870 OSL_FORCEINLINE void operator() (float &result, float x) const {
2871 result = sfm::simplexnoise1<0/* seed */>(x);
2872 }
2873
operatorSimplexNoiseScalar2874 OSL_FORCEINLINE void operator() (float &result, float x, float y) const {
2875 result = sfm::simplexnoise2<0/* seed */>(x, y);
2876 }
2877
operatorSimplexNoiseScalar2878 OSL_FORCEINLINE void operator()(float &result, const Vec3 &p) const {
2879 result = sfm::simplexnoise3<0/* seed */>(p.x, p.y, p.z);
2880 }
2881
operatorSimplexNoiseScalar2882 OSL_FORCEINLINE void operator()(float &result, const Vec3 &p, float t) const {
2883 result = sfm::simplexnoise4<0/* seed */>(p.x, p.y, p.z, t);
2884 }
2885
operatorSimplexNoiseScalar2886 OSL_FORCEINLINE void operator() (Vec3 &result, float x) const {
2887 result.x = sfm::simplexnoise1<0/* seed */>(x);
2888 result.y = sfm::simplexnoise1<1/* seed */>(x);
2889 result.z = sfm::simplexnoise1<2/* seed */>(x);
2890 }
2891
operatorSimplexNoiseScalar2892 OSL_FORCEINLINE void operator() (Vec3 &result, float x, float y) const {
2893 result.x = sfm::simplexnoise2<0/* seed */>(x, y);
2894 result.y = sfm::simplexnoise2<1/* seed */>(x, y);
2895 result.z = sfm::simplexnoise2<2/* seed */>(x, y);
2896 }
2897
operatorSimplexNoiseScalar2898 OSL_FORCEINLINE void operator() (Vec3 &result, const Vec3 &p) const {
2899 result.x = sfm::simplexnoise3<0/* seed */>(p.x, p.y, p.z);
2900 result.y = sfm::simplexnoise3<1/* seed */>(p.x, p.y, p.z);
2901 result.z = sfm::simplexnoise3<2/* seed */>(p.x, p.y, p.z);
2902 }
2903
operatorSimplexNoiseScalar2904 OSL_FORCEINLINE void operator() (Vec3 &result, const Vec3 &p, float t) const {
2905 result.x = sfm::simplexnoise4<0/* seed */>(p.x, p.y, p.z, t);
2906 result.y = sfm::simplexnoise4<1/* seed */>(p.x, p.y, p.z, t);
2907 result.z = sfm::simplexnoise4<2/* seed */>(p.x, p.y, p.z, t);
2908 }
2909
2910
2911 // dual versions
2912 template<int seed=0>
operatorSimplexNoiseScalar2913 OSL_FORCEINLINE void operator() (Dual2<float> &result, const Dual2<float> &x) const {
2914 float r, dndx;
2915 r = sfm::simplexnoise1<seed>(x.val(), sfm::DxRef(dndx));
2916 result.set (r, dndx * x.dx(), dndx * x.dy());
2917 }
2918
2919 template<int seedT=0>
operatorSimplexNoiseScalar2920 OSL_FORCEINLINE void operator() (Dual2<float> &result, const Dual2<float> &x, const Dual2<float> &y) const {
2921 float r, dndx, dndy;
2922 r = sfm::simplexnoise2<seedT> (x.val(), y.val(), sfm::DxDyRef(dndx, dndy));
2923 result.set (r, dndx * x.dx() + dndy * y.dx(),
2924 dndx * x.dy() + dndy * y.dy());
2925 }
2926
2927 template<int seed=0>
operatorSimplexNoiseScalar2928 OSL_FORCEINLINE void operator() (Dual2<float> &result, const Dual2<Vec3> &p) const {
2929 float dndx, dndy, dndz;
2930 const Vec3 &p_val = p.val();
2931 float r = sfm::simplexnoise3<seed>(p_val.x, p_val.y, p_val.z, sfm::DxDyDzRef(dndx, dndy, dndz));
2932 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z,
2933 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z);
2934 }
2935
2936 template<int seed=0>
operatorSimplexNoiseScalar2937 OSL_FORCEINLINE void operator()(Dual2<float> &result, const Dual2<Vec3> &p,
2938 const Dual2<float> &t) const {
2939 float dndx, dndy, dndz, dndt;
2940 float r = sfm::simplexnoise4<seed> (p.val().x, p.val().y, p.val().z, t.val(),
2941 sfm::DxDyDzDwRef(dndx, dndy, dndz, dndt));
2942 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z + dndt * t.dx(),
2943 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z + dndt * t.dy());
2944 }
2945
operatorSimplexNoiseScalar2946 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<float> &x) const {
2947 Dual2<float> r0, r1, r2;
2948 operator()<0>(r0, x);
2949 operator()<1>(r1, x);
2950 operator()<2>(r2, x);
2951 result = make_Vec3 (r0, r1, r2);
2952 }
2953
operatorSimplexNoiseScalar2954 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y) const {
2955 Dual2<float> r0, r1, r2;
2956 operator()<0>(r0, x, y);
2957 operator()<1>(r1, x, y);
2958 operator()<2>(r2, x, y);
2959 result = make_Vec3 (r0, r1, r2);
2960 }
2961
operatorSimplexNoiseScalar2962 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p) const {
2963 Dual2<float> r0, r1, r2;
2964 operator()<0>(r0, p);
2965 operator()<1>(r1, p);
2966 operator()<2>(r2, p);
2967 result = make_Vec3 (r0, r1, r2);
2968 }
2969
operatorSimplexNoiseScalar2970 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
2971 Dual2<float> r0, r1, r2;
2972 operator()<0>(r0, p, t);
2973 operator()<1>(r1, p, t);
2974 operator()<2>(r2, p, t);
2975 result = make_Vec3 (r0, r1, r2);
2976 }
2977 };
2978
2979 // Unsigned simplex noise
2980 struct USimplexNoise {
USimplexNoiseUSimplexNoise2981 OSL_HOSTDEVICE USimplexNoise () { }
2982
operatorUSimplexNoise2983 inline OSL_HOSTDEVICE void operator() (float &result, float x) const {
2984 result = 0.5f * (simplexnoise1 (x) + 1.0f);
2985 }
2986
operatorUSimplexNoise2987 inline OSL_HOSTDEVICE void operator() (float &result, float x, float y) const {
2988 result = 0.5f * (simplexnoise2 (x, y) + 1.0f);
2989 }
2990
operatorUSimplexNoise2991 inline OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p) const {
2992 result = 0.5f * (simplexnoise3 (p.x, p.y, p.z) + 1.0f);
2993 }
2994
operatorUSimplexNoise2995 inline OSL_HOSTDEVICE void operator() (float &result, const Vec3 &p, float t) const {
2996 result = 0.5f * (simplexnoise4 (p.x, p.y, p.z, t) + 1.0f);
2997 }
2998
operatorUSimplexNoise2999 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x) const {
3000 result.x = 0.5f * (simplexnoise1 (x, 0) + 1.0f);
3001 result.y = 0.5f * (simplexnoise1 (x, 1) + 1.0f);
3002 result.z = 0.5f * (simplexnoise1 (x, 2) + 1.0f);
3003 }
3004
operatorUSimplexNoise3005 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, float x, float y) const {
3006 result.x = 0.5f * (simplexnoise2 (x, y, 0) + 1.0f);
3007 result.y = 0.5f * (simplexnoise2 (x, y, 1) + 1.0f);
3008 result.z = 0.5f * (simplexnoise2 (x, y, 2) + 1.0f);
3009 }
3010
operatorUSimplexNoise3011 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p) const {
3012 result.x = 0.5f * (simplexnoise3 (p.x, p.y, p.z, 0) + 1.0f);
3013 result.y = 0.5f * (simplexnoise3 (p.x, p.y, p.z, 1) + 1.0f);
3014 result.z = 0.5f * (simplexnoise3 (p.x, p.y, p.z, 2) + 1.0f);
3015 }
3016
operatorUSimplexNoise3017 OSL_FORCEINLINE OSL_HOSTDEVICE void operator() (Vec3 &result, const Vec3 &p, float t) const {
3018 result.x = 0.5f * (simplexnoise4 (p.x, p.y, p.z, t, 0) + 1.0f);
3019 result.y = 0.5f * (simplexnoise4 (p.x, p.y, p.z, t, 1) + 1.0f);
3020 result.z = 0.5f * (simplexnoise4 (p.x, p.y, p.z, t, 2) + 1.0f);
3021 }
3022
3023
3024 // dual versions
3025
operatorUSimplexNoise3026 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x,
3027 int seed=0) const {
3028 float r, dndx;
3029 r = simplexnoise1 (x.val(), seed, &dndx);
3030 r = 0.5f * (r + 1.0f);
3031 dndx *= 0.5f;
3032 result.set (r, dndx * x.dx(), dndx * x.dy());
3033 }
3034
operatorUSimplexNoise3035 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<float> &x,
3036 const Dual2<float> &y, int seed=0) const {
3037 float r, dndx, dndy;
3038 r = simplexnoise2 (x.val(), y.val(), seed, &dndx, &dndy);
3039 r = 0.5f * (r + 1.0f);
3040 dndx *= 0.5f;
3041 dndy *= 0.5f;
3042 result.set (r, dndx * x.dx() + dndy * y.dx(),
3043 dndx * x.dy() + dndy * y.dy());
3044 }
3045
operatorUSimplexNoise3046 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p,
3047 int seed=0) const {
3048 float r, dndx, dndy, dndz;
3049 r = simplexnoise3 (p.val().x, p.val().y, p.val().z,
3050 seed, &dndx, &dndy, &dndz);
3051 r = 0.5f * (r + 1.0f);
3052 dndx *= 0.5f;
3053 dndy *= 0.5f;
3054 dndz *= 0.5f;
3055 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z,
3056 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z);
3057 }
3058
operatorUSimplexNoise3059 inline OSL_HOSTDEVICE void operator() (Dual2<float> &result, const Dual2<Vec3> &p,
3060 const Dual2<float> &t, int seed=0) const {
3061 float r, dndx, dndy, dndz, dndt;
3062 r = simplexnoise4 (p.val().x, p.val().y, p.val().z, t.val(),
3063 seed, &dndx, &dndy, &dndz, &dndt);
3064 r = 0.5f * (r + 1.0f);
3065 dndx *= 0.5f;
3066 dndy *= 0.5f;
3067 dndz *= 0.5f;
3068 dndt *= 0.5f;
3069 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z + dndt * t.dx(),
3070 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z + dndt * t.dy());
3071 }
3072
operatorUSimplexNoise3073 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x) const {
3074 Dual2<float> r0, r1, r2;
3075 (*this)(r0, x, 0);
3076 (*this)(r1, x, 1);
3077 (*this)(r2, x, 2);
3078 result = make_Vec3 (r0, r1, r2);
3079 }
3080
operatorUSimplexNoise3081 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y) const {
3082 Dual2<float> r0, r1, r2;
3083 (*this)(r0, x, y, 0);
3084 (*this)(r1, x, y, 1);
3085 (*this)(r2, x, y, 2);
3086 result = make_Vec3 (r0, r1, r2);
3087 }
3088
operatorUSimplexNoise3089 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p) const {
3090 Dual2<float> r0, r1, r2;
3091 (*this)(r0, p, 0);
3092 (*this)(r1, p, 1);
3093 (*this)(r2, p, 2);
3094 result = make_Vec3 (r0, r1, r2);
3095 }
3096
operatorUSimplexNoise3097 inline OSL_HOSTDEVICE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
3098 Dual2<float> r0, r1, r2;
3099 (*this)(r0, p, t, 0);
3100 (*this)(r1, p, t, 1);
3101 (*this)(r2, p, t, 2);
3102 result = make_Vec3 (r0, r1, r2);
3103 }
3104
3105 };
3106
3107 // Scalar version of USimplexNoise that is SIMD friendly suitable to be
3108 // inlined inside of a SIMD loops
3109 struct USimplexNoiseScalar {
USimplexNoiseScalarUSimplexNoiseScalar3110 OSL_FORCEINLINE USimplexNoiseScalar () { }
3111
operatorUSimplexNoiseScalar3112 OSL_FORCEINLINE void operator() (float &result, float x) const {
3113 result = 0.5f * (sfm::simplexnoise1<0/* seed */>(x) + 1.0f);
3114 }
3115
operatorUSimplexNoiseScalar3116 OSL_FORCEINLINE void operator() (float &result, float x, float y) const {
3117 result = 0.5f * (sfm::simplexnoise2<0/* seed */>(x, y) + 1.0f);
3118 }
3119
operatorUSimplexNoiseScalar3120 OSL_FORCEINLINE void operator() (float &result, const Vec3 &p) const {
3121 result = 0.5f * (sfm::simplexnoise3<0/* seed */>(p.x, p.y, p.z) + 1.0f);
3122 }
3123
operatorUSimplexNoiseScalar3124 OSL_FORCEINLINE void operator() (float &result, const Vec3 &p, float t) const {
3125 result = 0.5f * (sfm::simplexnoise4<0/* seed */> (p.x, p.y, p.z, t) + 1.0f);
3126 }
3127
operatorUSimplexNoiseScalar3128 OSL_FORCEINLINE void operator() (Vec3 &result, float x) const {
3129 result.x = 0.5f * (sfm::simplexnoise1<0/* seed */>(x) + 1.0f);
3130 result.y = 0.5f * (sfm::simplexnoise1<1/* seed */>(x) + 1.0f);
3131 result.z = 0.5f * (sfm::simplexnoise1<2/* seed */>(x) + 1.0f);
3132 }
3133
operatorUSimplexNoiseScalar3134 OSL_FORCEINLINE void operator() (Vec3 &result, float x, float y) const {
3135 result.x = 0.5f * (sfm::simplexnoise2<0>(x, y) + 1.0f);
3136 result.y = 0.5f * (sfm::simplexnoise2<1>(x, y) + 1.0f);
3137 result.z = 0.5f * (sfm::simplexnoise2<2>(x, y) + 1.0f);
3138 }
3139
operatorUSimplexNoiseScalar3140 OSL_FORCEINLINE void operator() (Vec3 &result, const Vec3 &p) const {
3141 result.x = 0.5f * (sfm::simplexnoise3<0/* seed */>(p.x, p.y, p.z) + 1.0f);
3142 result.y = 0.5f * (sfm::simplexnoise3<1/* seed */>(p.x, p.y, p.z) + 1.0f);
3143 result.z = 0.5f * (sfm::simplexnoise3<2/* seed */>(p.x, p.y, p.z) + 1.0f);
3144 }
3145
operatorUSimplexNoiseScalar3146 OSL_FORCEINLINE void operator() (Vec3 &result, const Vec3 &p, float t) const {
3147 result.x = 0.5f * (sfm::simplexnoise4<0/* seed */>(p.x, p.y, p.z, t) + 1.0f);
3148 result.y = 0.5f * (sfm::simplexnoise4<1/* seed */>(p.x, p.y, p.z, t) + 1.0f);
3149 result.z = 0.5f * (sfm::simplexnoise4<2/* seed */>(p.x, p.y, p.z, t) + 1.0f);
3150 }
3151
3152 // dual versions
3153 template<int seed=0>
operatorUSimplexNoiseScalar3154 OSL_FORCEINLINE void operator() (Dual2<float> &result, const Dual2<float> &x) const {
3155 float r, dndx;
3156 r = sfm::simplexnoise1<seed>(x.val(), sfm::DxRef(dndx));
3157 r = 0.5f * (r + 1.0f);
3158 dndx *= 0.5f;
3159 result.set (r, dndx * x.dx(), dndx * x.dy());
3160 }
3161
3162 template<int seedT=0>
operatorUSimplexNoiseScalar3163 OSL_FORCEINLINE void operator() (Dual2<float> &result, const Dual2<float> &x,
3164 const Dual2<float> &y) const {
3165 float r, dndx, dndy;
3166 r = sfm::simplexnoise2<seedT> (x.val(), y.val(), sfm::DxDyRef(dndx, dndy));
3167 r = 0.5f * (r + 1.0f);
3168 dndx *= 0.5f;
3169 dndy *= 0.5f;
3170 result.set (r, dndx * x.dx() + dndy * y.dx(),
3171 dndx * x.dy() + dndy * y.dy());
3172 }
3173
3174 template<int seed=0>
operatorUSimplexNoiseScalar3175 OSL_FORCEINLINE void operator() (Dual2<float> &result, const Dual2<Vec3> &p) const {
3176 float r, dndx, dndy, dndz;
3177 const Vec3 &p_val = p.val();
3178 r = sfm::simplexnoise3<seed>(p_val.x, p_val.y, p_val.z, sfm::DxDyDzRef(dndx, dndy, dndz));
3179 r = 0.5f * (r + 1.0f);
3180 dndx *= 0.5f;
3181 dndy *= 0.5f;
3182 dndz *= 0.5f;
3183 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z,
3184 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z);
3185 }
3186
3187 template<int seed=0>
operatorUSimplexNoiseScalar3188 OSL_FORCEINLINE void operator()(Dual2<float> &result, const Dual2<Vec3> &p,
3189 const Dual2<float> &t) const {
3190 float dndx, dndy, dndz, dndt;
3191 float r = sfm::simplexnoise4<seed> (p.val().x, p.val().y, p.val().z, t.val(),
3192 sfm::DxDyDzDwRef(dndx, dndy, dndz, dndt));
3193 r = 0.5f * (r + 1.0f);
3194 dndx *= 0.5f;
3195 dndy *= 0.5f;
3196 dndz *= 0.5f;
3197 dndt *= 0.5f;
3198 result.set (r, dndx * p.dx().x + dndy * p.dx().y + dndz * p.dx().z + dndt * t.dx(),
3199 dndx * p.dy().x + dndy * p.dy().y + dndz * p.dy().z + dndt * t.dy());
3200 }
3201
operatorUSimplexNoiseScalar3202 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<float> &x) const {
3203 Dual2<float> r0, r1, r2;
3204 operator()<0>(r0, x);
3205 operator()<1>(r1, x);
3206 operator()<2>(r2, x);
3207 result = make_Vec3 (r0, r1, r2);
3208 }
3209
operatorUSimplexNoiseScalar3210 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<float> &x, const Dual2<float> &y) const {
3211 Dual2<float> r0, r1, r2;
3212 operator()<0>(r0, x, y);
3213 operator()<1>(r1, x, y);
3214 operator()<2>(r2, x, y);
3215 result = make_Vec3 (r0, r1, r2);
3216 }
3217
operatorUSimplexNoiseScalar3218 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p) const {
3219 Dual2<float> r0, r1, r2;
3220 operator()<0>(r0, p);
3221 operator()<1>(r1, p);
3222 operator()<2>(r2, p);
3223 result = make_Vec3 (r0, r1, r2);
3224 }
3225
operatorUSimplexNoiseScalar3226 OSL_FORCEINLINE void operator() (Dual2<Vec3> &result, const Dual2<Vec3> &p, const Dual2<float> &t) const {
3227 Dual2<float> r0, r1, r2;
3228 operator()<0>(r0, p, t);
3229 operator()<1>(r1, p, t);
3230 operator()<2>(r2, p, t);
3231 result = make_Vec3 (r0, r1, r2);
3232 }
3233
3234 };
3235 } // anonymous namespace
3236
3237
3238
3239 OSLNOISEPUBLIC OSL_HOSTDEVICE
3240 Dual2<float> gabor (const Dual2<Vec3> &P, const NoiseParams *opt);
3241
3242
3243
3244 OSLNOISEPUBLIC OSL_HOSTDEVICE
3245 Dual2<float> gabor (const Dual2<float> &x, const Dual2<float> &y,
3246 const NoiseParams *opt);
3247
3248 OSLNOISEPUBLIC OSL_HOSTDEVICE
3249 Dual2<float> gabor (const Dual2<float> &x, const NoiseParams *opt);
3250
3251 OSLNOISEPUBLIC OSL_HOSTDEVICE
3252 Dual2<Vec3> gabor3 (const Dual2<Vec3> &P, const NoiseParams *opt);
3253
3254 OSLNOISEPUBLIC OSL_HOSTDEVICE
3255 Dual2<Vec3> gabor3 (const Dual2<float> &x, const Dual2<float> &y,
3256 const NoiseParams *opt);
3257
3258 OSLNOISEPUBLIC OSL_HOSTDEVICE
3259 Dual2<Vec3> gabor3 (const Dual2<float> &x, const NoiseParams *opt);
3260
3261 OSLNOISEPUBLIC OSL_HOSTDEVICE
3262 Dual2<float> pgabor (const Dual2<Vec3> &P, const Vec3 &Pperiod,
3263 const NoiseParams *opt);
3264
3265 OSLNOISEPUBLIC OSL_HOSTDEVICE
3266 Dual2<float> pgabor (const Dual2<float> &x, const Dual2<float> &y,
3267 float xperiod, float yperiod, const NoiseParams *opt);
3268
3269 OSLNOISEPUBLIC OSL_HOSTDEVICE
3270 Dual2<float> pgabor (const Dual2<float> &x, float xperiod,
3271 const NoiseParams *opt);
3272
3273 OSLNOISEPUBLIC OSL_HOSTDEVICE
3274 Dual2<Vec3> pgabor3 (const Dual2<Vec3> &P, const Vec3 &Pperiod,
3275 const NoiseParams *opt);
3276
3277 OSLNOISEPUBLIC OSL_HOSTDEVICE
3278 Dual2<Vec3> pgabor3 (const Dual2<float> &x, const Dual2<float> &y,
3279 float xperiod, float yperiod, const NoiseParams *opt);
3280
3281 OSLNOISEPUBLIC OSL_HOSTDEVICE
3282 Dual2<Vec3> pgabor3 (const Dual2<float> &x, float xperiod,
3283 const NoiseParams *opt);
3284
3285
3286
3287 }; // namespace pvt
3288
3289 namespace oslnoise {
3290
3291 #define DECLNOISE(name,impl) \
3292 template <class S> OSL_HOSTDEVICE \
3293 inline float name (S x) { \
3294 pvt::impl noise; \
3295 float r; \
3296 noise (r, x); \
3297 return r; \
3298 } \
3299 \
3300 template <class S, class T> OSL_HOSTDEVICE \
3301 inline float name (S x, T y) { \
3302 pvt::impl noise; \
3303 float r; \
3304 noise (r, x, y); \
3305 return r; \
3306 } \
3307 \
3308 template <class S> OSL_HOSTDEVICE \
3309 inline Vec3 v ## name (S x) { \
3310 pvt::impl noise; \
3311 Vec3 r; \
3312 noise (r, x); \
3313 return r; \
3314 } \
3315 \
3316 template <class S, class T> OSL_HOSTDEVICE \
3317 inline Vec3 v ## name (S x, T y) { \
3318 pvt::impl noise; \
3319 Vec3 r; \
3320 noise (r, x, y); \
3321 return r; \
3322 }
3323
3324
3325 DECLNOISE (snoise, SNoise)
3326 DECLNOISE (noise, Noise)
3327 DECLNOISE (cellnoise, CellNoise)
3328 DECLNOISE (hashnoise, HashNoise)
3329
3330 #undef DECLNOISE
3331 } // namespace oslnoise
3332
3333
3334 OSL_NAMESPACE_EXIT
3335