1 #ifndef PtexUtils_h
2 #define PtexUtils_h
3
4 /*
5 PTEX SOFTWARE
6 Copyright 2014 Disney Enterprises, Inc. All rights reserved
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14
15 * Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in
17 the documentation and/or other materials provided with the
18 distribution.
19
20 * The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
21 Studios" or the names of its contributors may NOT be used to
22 endorse or promote products derived from this software without
23 specific prior written permission from Walt Disney Pictures.
24
25 Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
26 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
27 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
28 FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
29 IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
30 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
37 */
38
39 #include <cmath>
40 #include "Ptexture.h"
41 #include "PtexHalf.h"
42
43 #ifdef __SSE4_1__
44 #include <smmintrin.h>
45 #endif
46
47 #include "PtexVersion.h"
48
49 PTEX_NAMESPACE_BEGIN
50 namespace PtexUtils {
51
isPowerOfTwo(int x)52 inline bool isPowerOfTwo(int x)
53 {
54 return !(x&(x-1));
55 }
56
ones(uint32_t x)57 inline uint32_t ones(uint32_t x)
58 {
59 // count number of ones
60 x = (x & 0x55555555) + ((x >> 1) & 0x55555555); // add pairs of bits
61 x = (x & 0x33333333) + ((x >> 2) & 0x33333333); // add bit pairs
62 x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); // add nybbles
63 x += (x >> 8); // add bytes
64 x += (x >> 16); // add words
65 return(x & 0xff);
66 }
67
floor_log2(uint32_t x)68 inline uint32_t floor_log2(uint32_t x)
69 {
70 // floor(log2(n))
71 x |= (x >> 1);
72 x |= (x >> 2);
73 x |= (x >> 4);
74 x |= (x >> 8);
75 x |= (x >> 16);
76 return ones(x>>1);
77 }
78
ceil_log2(uint32_t x)79 inline uint32_t ceil_log2(uint32_t x)
80 {
81 // ceil(log2(n))
82 bool isPow2 = isPowerOfTwo(x);
83 x |= (x >> 1);
84 x |= (x >> 2);
85 x |= (x >> 4);
86 x |= (x >> 8);
87 x |= (x >> 16);
88 return ones(x>>1) + !isPow2;
89 }
90
reciprocalPow2(int power)91 inline float reciprocalPow2(int power)
92 {
93 // 1.0/pow(2,power)
94 union {
95 float f;
96 int32_t i;
97 };
98 i = (127-power)<<23;
99 return f;
100 }
101
calcResFromWidth(float w)102 inline int calcResFromWidth(float w)
103 {
104 // read exponent directly from float32 representation
105 // equiv to ceil(log2(1.0/w)) but much faster and no error
106 union {
107 float wf;
108 int32_t wi;
109 };
110 wf = w;
111 int result = 127 - ((wi >> 23) & 0xff);
112 return result;
113 }
114
smoothstep(float x,float a,float b)115 inline float smoothstep(float x, float a, float b)
116 {
117 if ( x < a ) return 0;
118 if ( x >= b ) return 1;
119 x = (x - a)/(b - a);
120 return x*x * (3 - 2*x);
121 }
122
qsmoothstep(float x,float a,float b)123 inline float qsmoothstep(float x, float a, float b)
124 {
125 // quintic smoothstep (cubic is only C1)
126 if ( x < a ) return 0;
127 if ( x >= b ) return 1;
128 x = (x - a)/(b - a);
129 return x*x*x * (10 + x * (-15 + x*6));
130 }
131
132 template<typename T>
abs(T x)133 inline T abs(T x) { return x > 0 ? x : -x; }
134
abs(float x)135 inline float abs(float x)
136 {
137 union {
138 float f;
139 int32_t i;
140 };
141 f = x;
142 i &= 0x7fffffff;
143 return f;
144 }
145
146 template<typename T>
min(T a,T b)147 inline T min(T a, T b) { return a < b ? a : b; }
148
149 template<typename T>
max(T a,T b)150 inline T max(T a, T b) { return a > b ? a : b; }
151
152 template<typename T>
clamp(T x,T lo,T hi)153 inline T clamp(T x, T lo, T hi) { return min(max(x,lo),hi); }
154
155 template<typename T>
halve(T val)156 inline T halve(T val) { return T(val>>1); }
157
halve(float val)158 inline float halve(float val) { return 0.5f * val; }
halve(PtexHalf val)159 inline PtexHalf halve(PtexHalf val) { return 0.5f * val; }
160
161 template<typename T>
quarter(T val)162 inline T quarter(T val) { return T(val>>2); }
163
quarter(float val)164 inline float quarter(float val) { return 0.25f * val; }
quarter(PtexHalf val)165 inline PtexHalf quarter(PtexHalf val) { return 0.25f * val; }
166
167 bool isConstant(const void* data, int stride, int ures, int vres, int pixelSize);
168 void interleave(const void* src, int sstride, int ures, int vres,
169 void* dst, int dstride, DataType dt, int nchannels);
170 void deinterleave(const void* src, int sstride, int ures, int vres,
171 void* dst, int dstride, DataType dt, int nchannels);
172 void encodeDifference(void* data, int size, DataType dt);
173 void decodeDifference(void* data, int size, DataType dt);
174 typedef void ReduceFn(const void* src, int sstride, int ures, int vres,
175 void* dst, int dstride, DataType dt, int nchannels);
176 void reduce(const void* src, int sstride, int ures, int vres,
177 void* dst, int dstride, DataType dt, int nchannels);
178 void reduceu(const void* src, int sstride, int ures, int vres,
179 void* dst, int dstride, DataType dt, int nchannels);
180 void reducev(const void* src, int sstride, int ures, int vres,
181 void* dst, int dstride, DataType dt, int nchannels);
182 void reduceTri(const void* src, int sstride, int ures, int vres,
183 void* dst, int dstride, DataType dt, int nchannels);
184 void average(const void* src, int sstride, int ures, int vres,
185 void* dst, DataType dt, int nchannels);
186 void fill(const void* src, void* dst, int dstride,
187 int ures, int vres, int pixelsize);
188 void copy(const void* src, int sstride, void* dst, int dstride,
189 int nrows, int rowlen);
190 void blend(const void* src, float weight, void* dst, bool flip,
191 int rowlen, DataType dt, int nchannels);
192 void multalpha(void* data, int npixels, DataType dt, int nchannels, int alphachan);
193 void divalpha(void* data, int npixels, DataType dt, int nchannels, int alphachan);
194
195 void genRfaceids(const FaceInfo* faces, int nfaces,
196 uint32_t* rfaceids, uint32_t* faceids);
197
198 // fixed length vector accumulator: dst[i] += val[i] * weight
199 template<typename T, int n>
200 struct VecAccum {
VecAccumVecAccum201 VecAccum() {}
operatorVecAccum202 void operator()(float* dst, const T* val, float weight)
203 {
204 *dst += (float)*val * weight;
205 // use template to unroll loop
206 VecAccum<T,n-1>()(dst+1, val+1, weight);
207 }
208 };
209 template<typename T>
210 struct VecAccum<T,0> { void operator()(float*, const T*, float) {} };
211
212 // variable length vector accumulator: dst[i] += val[i] * weight
213 template<typename T>
214 struct VecAccumN {
215 void operator()(float* dst, const T* val, int nchan, float weight)
216 {
217 for (int i = 0; i < nchan; i++) dst[i] += (float)val[i] * weight;
218 }
219 };
220
221 // fixed length vector multiplier: dst[i] += val[i] * weight
222 template<typename T, int n>
223 struct VecMult {
224 VecMult() {}
225 void operator()(float* dst, const T* val, float weight)
226 {
227 *dst = (float)*val * weight;
228 // use template to unroll loop
229 VecMult<T,n-1>()(dst+1, val+1, weight);
230 }
231 };
232 template<typename T>
233 struct VecMult<T,0> { void operator()(float*, const T*, float) {} };
234
235 // variable length vector multiplier: dst[i] = val[i] * weight
236 template<typename T>
237 struct VecMultN {
238 void operator()(float* dst, const T* val, int nchan, float weight)
239 {
240 for (int i = 0; i < nchan; i++) dst[i] = (float)val[i] * weight;
241 }
242 };
243
244 typedef void (*ApplyConstFn)(float weight, float* dst, void* data, int nChan);
245 extern ApplyConstFn applyConstFunctions[20];
246 inline void applyConst(float weight, float* dst, void* data, Ptex::DataType dt, int nChan)
247 {
248 // dispatch specialized apply function
249 ApplyConstFn fn = applyConstFunctions[((unsigned)nChan<=4)*nChan*4 + dt];
250 fn(weight, dst, data, nChan);
251 }
252
253 #ifdef __SSE4_1__
254 inline float floor(float f) {
255 float result;
256 _mm_store_ss(&result, _mm_round_ps(_mm_set1_ps(f), (_MM_FROUND_NO_EXC | _MM_FROUND_TO_NEG_INF)));
257 return result;
258 }
259 inline float ceil(float f) {
260 float result;
261 _mm_store_ss(&result, _mm_round_ps(_mm_set1_ps(f), (_MM_FROUND_NO_EXC | _MM_FROUND_TO_POS_INF)));
262 return result;
263 }
264 #else
265 using std::floor;
266 using std::ceil;
267 #endif
268
269 } // end namespace Utils
270
271 PTEX_NAMESPACE_END
272
273 #endif
274