1 #ifndef PtexUtils_h
2 #define PtexUtils_h
3 
4 /*
5 PTEX SOFTWARE
6 Copyright 2014 Disney Enterprises, Inc.  All rights reserved
7 
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11 
12   * Redistributions of source code must retain the above copyright
13     notice, this list of conditions and the following disclaimer.
14 
15   * Redistributions in binary form must reproduce the above copyright
16     notice, this list of conditions and the following disclaimer in
17     the documentation and/or other materials provided with the
18     distribution.
19 
20   * The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
21     Studios" or the names of its contributors may NOT be used to
22     endorse or promote products derived from this software without
23     specific prior written permission from Walt Disney Pictures.
24 
25 Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
26 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
27 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
28 FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
29 IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
30 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
37 */
38 
39 #include <cmath>
40 #include "Ptexture.h"
41 #include "PtexHalf.h"
42 
43 #ifdef __SSE4_1__
44 #include <smmintrin.h>
45 #endif
46 
47 #include "PtexVersion.h"
48 
49 PTEX_NAMESPACE_BEGIN
50 namespace PtexUtils {
51 
isPowerOfTwo(int x)52 inline bool isPowerOfTwo(int x)
53 {
54     return !(x&(x-1));
55 }
56 
ones(uint32_t x)57 inline uint32_t ones(uint32_t x)
58 {
59     // count number of ones
60     x = (x & 0x55555555) + ((x >> 1) & 0x55555555); // add pairs of bits
61     x = (x & 0x33333333) + ((x >> 2) & 0x33333333); // add bit pairs
62     x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); // add nybbles
63     x += (x >> 8);                                      // add bytes
64     x += (x >> 16);                                     // add words
65     return(x & 0xff);
66 }
67 
floor_log2(uint32_t x)68 inline uint32_t floor_log2(uint32_t x)
69 {
70     // floor(log2(n))
71     x |= (x >> 1);
72     x |= (x >> 2);
73     x |= (x >> 4);
74     x |= (x >> 8);
75     x |= (x >> 16);
76     return ones(x>>1);
77 }
78 
ceil_log2(uint32_t x)79 inline uint32_t ceil_log2(uint32_t x)
80 {
81     // ceil(log2(n))
82     bool isPow2 = isPowerOfTwo(x);
83     x |= (x >> 1);
84     x |= (x >> 2);
85     x |= (x >> 4);
86     x |= (x >> 8);
87     x |= (x >> 16);
88     return ones(x>>1) + !isPow2;
89 }
90 
reciprocalPow2(int power)91 inline float reciprocalPow2(int power)
92 {
93     // 1.0/pow(2,power)
94     union {
95         float f;
96         int32_t i;
97     };
98     i = (127-power)<<23;
99     return f;
100 }
101 
calcResFromWidth(float w)102 inline int calcResFromWidth(float w)
103 {
104     // read exponent directly from float32 representation
105     // equiv to ceil(log2(1.0/w)) but much faster and no error
106     union {
107         float wf;
108         int32_t wi;
109     };
110     wf = w;
111     int result = 127 - ((wi >> 23) & 0xff);
112     return result;
113 }
114 
smoothstep(float x,float a,float b)115 inline float smoothstep(float x, float a, float b)
116 {
117     if ( x < a ) return 0;
118     if ( x >= b ) return 1;
119     x = (x - a)/(b - a);
120     return x*x * (3 - 2*x);
121 }
122 
qsmoothstep(float x,float a,float b)123 inline float qsmoothstep(float x, float a, float b)
124 {
125     // quintic smoothstep (cubic is only C1)
126     if ( x < a ) return 0;
127     if ( x >= b ) return 1;
128     x = (x - a)/(b - a);
129     return x*x*x * (10 + x * (-15 + x*6));
130 }
131 
132 template<typename T>
abs(T x)133 inline T abs(T x) { return x > 0 ? x : -x; }
134 
abs(float x)135 inline float abs(float x)
136 {
137     union {
138         float f;
139         int32_t i;
140     };
141     f = x;
142     i &= 0x7fffffff;
143     return f;
144 }
145 
146 template<typename T>
min(T a,T b)147 inline T min(T a, T b) { return a < b ? a : b; }
148 
149 template<typename T>
max(T a,T b)150 inline T max(T a, T b) { return a > b ? a : b; }
151 
152 template<typename T>
clamp(T x,T lo,T hi)153 inline T clamp(T x, T lo, T hi) { return min(max(x,lo),hi); }
154 
155 template<typename T>
halve(T val)156 inline T halve(T val) { return T(val>>1); }
157 
halve(float val)158 inline float halve(float val) { return 0.5f * val; }
halve(PtexHalf val)159 inline PtexHalf halve(PtexHalf val) { return 0.5f * val; }
160 
161 template<typename T>
quarter(T val)162 inline T quarter(T val) { return T(val>>2); }
163 
quarter(float val)164 inline float quarter(float val) { return 0.25f * val; }
quarter(PtexHalf val)165 inline PtexHalf quarter(PtexHalf val) { return 0.25f * val; }
166 
167 bool isConstant(const void* data, int stride, int ures, int vres, int pixelSize);
168 void interleave(const void* src, int sstride, int ures, int vres,
169                 void* dst, int dstride, DataType dt, int nchannels);
170 void deinterleave(const void* src, int sstride, int ures, int vres,
171                   void* dst, int dstride, DataType dt, int nchannels);
172 void encodeDifference(void* data, int size, DataType dt);
173 void decodeDifference(void* data, int size, DataType dt);
174 typedef void ReduceFn(const void* src, int sstride, int ures, int vres,
175                       void* dst, int dstride, DataType dt, int nchannels);
176 void reduce(const void* src, int sstride, int ures, int vres,
177             void* dst, int dstride, DataType dt, int nchannels);
178 void reduceu(const void* src, int sstride, int ures, int vres,
179              void* dst, int dstride, DataType dt, int nchannels);
180 void reducev(const void* src, int sstride, int ures, int vres,
181              void* dst, int dstride, DataType dt, int nchannels);
182 void reduceTri(const void* src, int sstride, int ures, int vres,
183                void* dst, int dstride, DataType dt, int nchannels);
184 void average(const void* src, int sstride, int ures, int vres,
185              void* dst, DataType dt, int nchannels);
186 void fill(const void* src, void* dst, int dstride,
187           int ures, int vres, int pixelsize);
188 void copy(const void* src, int sstride, void* dst, int dstride,
189           int nrows, int rowlen);
190 void blend(const void* src, float weight, void* dst, bool flip,
191            int rowlen, DataType dt, int nchannels);
192 void multalpha(void* data, int npixels, DataType dt, int nchannels, int alphachan);
193 void divalpha(void* data, int npixels, DataType dt, int nchannels, int alphachan);
194 
195 void genRfaceids(const FaceInfo* faces, int nfaces,
196                  uint32_t* rfaceids, uint32_t* faceids);
197 
198 // fixed length vector accumulator: dst[i] += val[i] * weight
199 template<typename T, int n>
200 struct VecAccum {
VecAccumVecAccum201     VecAccum() {}
operatorVecAccum202     void operator()(float* dst, const T* val, float weight)
203     {
204         *dst += (float)*val * weight;
205         // use template to unroll loop
206         VecAccum<T,n-1>()(dst+1, val+1, weight);
207     }
208 };
209 template<typename T>
210 struct VecAccum<T,0> { void operator()(float*, const T*, float) {} };
211 
212 // variable length vector accumulator: dst[i] += val[i] * weight
213 template<typename T>
214 struct VecAccumN {
215     void operator()(float* dst, const T* val, int nchan, float weight)
216     {
217         for (int i = 0; i < nchan; i++) dst[i] += (float)val[i] * weight;
218     }
219 };
220 
221 // fixed length vector multiplier: dst[i] += val[i] * weight
222 template<typename T, int n>
223 struct VecMult {
224     VecMult() {}
225     void operator()(float* dst, const T* val, float weight)
226     {
227         *dst = (float)*val * weight;
228         // use template to unroll loop
229         VecMult<T,n-1>()(dst+1, val+1, weight);
230     }
231 };
232 template<typename T>
233 struct VecMult<T,0> { void operator()(float*, const T*, float) {} };
234 
235 // variable length vector multiplier: dst[i] = val[i] * weight
236 template<typename T>
237 struct VecMultN {
238     void operator()(float* dst, const T* val, int nchan, float weight)
239     {
240         for (int i = 0; i < nchan; i++) dst[i] = (float)val[i] * weight;
241     }
242 };
243 
244 typedef void (*ApplyConstFn)(float weight, float* dst, void* data, int nChan);
245 extern ApplyConstFn applyConstFunctions[20];
246 inline void applyConst(float weight, float* dst, void* data, Ptex::DataType dt, int nChan)
247 {
248     // dispatch specialized apply function
249     ApplyConstFn fn = applyConstFunctions[((unsigned)nChan<=4)*nChan*4 + dt];
250     fn(weight, dst, data, nChan);
251 }
252 
253 #ifdef __SSE4_1__
254 inline float floor(float f) {
255     float result;
256     _mm_store_ss(&result, _mm_round_ps(_mm_set1_ps(f), (_MM_FROUND_NO_EXC | _MM_FROUND_TO_NEG_INF)));
257     return result;
258 }
259 inline float ceil(float f) {
260     float result;
261     _mm_store_ss(&result, _mm_round_ps(_mm_set1_ps(f), (_MM_FROUND_NO_EXC | _MM_FROUND_TO_POS_INF)));
262     return result;
263 }
264 #else
265 using std::floor;
266 using std::ceil;
267 #endif
268 
269 } // end namespace Utils
270 
271 PTEX_NAMESPACE_END
272 
273 #endif
274