1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __UTIL_TRANSFORM_H__
18 #define __UTIL_TRANSFORM_H__
19 
20 #ifndef __KERNEL_GPU__
21 #  include <string.h>
22 #endif
23 
24 #include "util/util_math.h"
25 #include "util/util_types.h"
26 
27 CCL_NAMESPACE_BEGIN
28 
29 /* Affine transformation, stored as 4x3 matrix. */
30 
31 typedef struct Transform {
32   float4 x, y, z;
33 
34 #ifndef __KERNEL_GPU__
35   float4 operator[](int i) const
36   {
37     return *(&x + i);
38   }
39   float4 &operator[](int i)
40   {
41     return *(&x + i);
42   }
43 #endif
44 } Transform;
45 
46 /* Transform decomposed in rotation/translation/scale. we use the same data
47  * structure as Transform, and tightly pack decomposition into it. first the
48  * rotation (4), then translation (3), then 3x3 scale matrix (9). */
49 
50 typedef struct DecomposedTransform {
51   float4 x, y, z, w;
52 } DecomposedTransform;
53 
54 /* Functions */
55 
transform_point(const Transform * t,const float3 a)56 ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
57 {
58   /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
59 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
60   ssef x, y, z, w, aa;
61   aa = a.m128;
62 
63   x = _mm_loadu_ps(&t->x.x);
64   y = _mm_loadu_ps(&t->y.x);
65   z = _mm_loadu_ps(&t->z.x);
66   w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
67 
68   _MM_TRANSPOSE4_PS(x, y, z, w);
69 
70   ssef tmp = shuffle<0>(aa) * x;
71   tmp = madd(shuffle<1>(aa), y, tmp);
72   tmp = madd(shuffle<2>(aa), z, tmp);
73   tmp += w;
74 
75   return float3(tmp.m128);
76 #else
77   float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w,
78                          a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w,
79                          a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w);
80 
81   return c;
82 #endif
83 }
84 
transform_direction(const Transform * t,const float3 a)85 ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
86 {
87 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
88   ssef x, y, z, w, aa;
89   aa = a.m128;
90   x = _mm_loadu_ps(&t->x.x);
91   y = _mm_loadu_ps(&t->y.x);
92   z = _mm_loadu_ps(&t->z.x);
93   w = _mm_setzero_ps();
94 
95   _MM_TRANSPOSE4_PS(x, y, z, w);
96 
97   ssef tmp = shuffle<0>(aa) * x;
98   tmp = madd(shuffle<1>(aa), y, tmp);
99   tmp = madd(shuffle<2>(aa), z, tmp);
100 
101   return float3(tmp.m128);
102 #else
103   float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
104                          a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
105                          a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
106 
107   return c;
108 #endif
109 }
110 
transform_direction_transposed(const Transform * t,const float3 a)111 ccl_device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
112 {
113   float3 x = make_float3(t->x.x, t->y.x, t->z.x);
114   float3 y = make_float3(t->x.y, t->y.y, t->z.y);
115   float3 z = make_float3(t->x.z, t->y.z, t->z.z);
116 
117   return make_float3(dot(x, a), dot(y, a), dot(z, a));
118 }
119 
make_transform(float a,float b,float c,float d,float e,float f,float g,float h,float i,float j,float k,float l)120 ccl_device_inline Transform make_transform(float a,
121                                            float b,
122                                            float c,
123                                            float d,
124                                            float e,
125                                            float f,
126                                            float g,
127                                            float h,
128                                            float i,
129                                            float j,
130                                            float k,
131                                            float l)
132 {
133   Transform t;
134 
135   t.x.x = a;
136   t.x.y = b;
137   t.x.z = c;
138   t.x.w = d;
139   t.y.x = e;
140   t.y.y = f;
141   t.y.z = g;
142   t.y.w = h;
143   t.z.x = i;
144   t.z.y = j;
145   t.z.z = k;
146   t.z.w = l;
147 
148   return t;
149 }
150 
euler_to_transform(const float3 euler)151 ccl_device_inline Transform euler_to_transform(const float3 euler)
152 {
153   float cx = cosf(euler.x);
154   float cy = cosf(euler.y);
155   float cz = cosf(euler.z);
156   float sx = sinf(euler.x);
157   float sy = sinf(euler.y);
158   float sz = sinf(euler.z);
159 
160   Transform t;
161   t.x.x = cy * cz;
162   t.y.x = cy * sz;
163   t.z.x = -sy;
164 
165   t.x.y = sy * sx * cz - cx * sz;
166   t.y.y = sy * sx * sz + cx * cz;
167   t.z.y = cy * sx;
168 
169   t.x.z = sy * cx * cz + sx * sz;
170   t.y.z = sy * cx * sz - sx * cz;
171   t.z.z = cy * cx;
172 
173   t.x.w = t.y.w = t.z.w = 0.0f;
174   return t;
175 }
176 
177 /* Constructs a coordinate frame from a normalized normal. */
make_transform_frame(float3 N)178 ccl_device_inline Transform make_transform_frame(float3 N)
179 {
180   const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N);
181   const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N);
182   const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1);
183   const float3 dy = normalize(cross(N, dx));
184   return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f);
185 }
186 
187 #ifndef __KERNEL_GPU__
188 
189 ccl_device_inline Transform operator*(const Transform a, const Transform b)
190 {
191   float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
192   float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
193   float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
194   float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
195 
196   Transform t;
197   t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
198   t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
199   t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
200 
201   return t;
202 }
203 
print_transform(const char * label,const Transform & t)204 ccl_device_inline void print_transform(const char *label, const Transform &t)
205 {
206   print_float4(label, t.x);
207   print_float4(label, t.y);
208   print_float4(label, t.z);
209   printf("\n");
210 }
211 
transform_translate(float3 t)212 ccl_device_inline Transform transform_translate(float3 t)
213 {
214   return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z);
215 }
216 
transform_translate(float x,float y,float z)217 ccl_device_inline Transform transform_translate(float x, float y, float z)
218 {
219   return transform_translate(make_float3(x, y, z));
220 }
221 
transform_scale(float3 s)222 ccl_device_inline Transform transform_scale(float3 s)
223 {
224   return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0);
225 }
226 
transform_scale(float x,float y,float z)227 ccl_device_inline Transform transform_scale(float x, float y, float z)
228 {
229   return transform_scale(make_float3(x, y, z));
230 }
231 
transform_rotate(float angle,float3 axis)232 ccl_device_inline Transform transform_rotate(float angle, float3 axis)
233 {
234   float s = sinf(angle);
235   float c = cosf(angle);
236   float t = 1.0f - c;
237 
238   axis = normalize(axis);
239 
240   return make_transform(axis.x * axis.x * t + c,
241                         axis.x * axis.y * t - s * axis.z,
242                         axis.x * axis.z * t + s * axis.y,
243                         0.0f,
244 
245                         axis.y * axis.x * t + s * axis.z,
246                         axis.y * axis.y * t + c,
247                         axis.y * axis.z * t - s * axis.x,
248                         0.0f,
249 
250                         axis.z * axis.x * t - s * axis.y,
251                         axis.z * axis.y * t + s * axis.x,
252                         axis.z * axis.z * t + c,
253                         0.0f);
254 }
255 
256 /* Euler is assumed to be in XYZ order. */
transform_euler(float3 euler)257 ccl_device_inline Transform transform_euler(float3 euler)
258 {
259   return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) *
260          transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) *
261          transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
262 }
263 
transform_identity()264 ccl_device_inline Transform transform_identity()
265 {
266   return transform_scale(1.0f, 1.0f, 1.0f);
267 }
268 
269 ccl_device_inline bool operator==(const Transform &A, const Transform &B)
270 {
271   return memcmp(&A, &B, sizeof(Transform)) == 0;
272 }
273 
274 ccl_device_inline bool operator!=(const Transform &A, const Transform &B)
275 {
276   return !(A == B);
277 }
278 
transform_get_column(const Transform * t,int column)279 ccl_device_inline float3 transform_get_column(const Transform *t, int column)
280 {
281   return make_float3(t->x[column], t->y[column], t->z[column]);
282 }
283 
transform_set_column(Transform * t,int column,float3 value)284 ccl_device_inline void transform_set_column(Transform *t, int column, float3 value)
285 {
286   t->x[column] = value.x;
287   t->y[column] = value.y;
288   t->z[column] = value.z;
289 }
290 
291 Transform transform_inverse(const Transform &a);
292 Transform transform_transposed_inverse(const Transform &a);
293 
transform_uniform_scale(const Transform & tfm,float & scale)294 ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
295 {
296   /* the epsilon here is quite arbitrary, but this function is only used for
297    * surface area and bump, where we expect it to not be so sensitive */
298   float eps = 1e-6f;
299 
300   float sx = len_squared(float4_to_float3(tfm.x));
301   float sy = len_squared(float4_to_float3(tfm.y));
302   float sz = len_squared(float4_to_float3(tfm.z));
303   float stx = len_squared(transform_get_column(&tfm, 0));
304   float sty = len_squared(transform_get_column(&tfm, 1));
305   float stz = len_squared(transform_get_column(&tfm, 2));
306 
307   if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps &&
308       fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) {
309     scale = sx;
310     return true;
311   }
312 
313   return false;
314 }
315 
transform_negative_scale(const Transform & tfm)316 ccl_device_inline bool transform_negative_scale(const Transform &tfm)
317 {
318   float3 c0 = transform_get_column(&tfm, 0);
319   float3 c1 = transform_get_column(&tfm, 1);
320   float3 c2 = transform_get_column(&tfm, 2);
321 
322   return (dot(cross(c0, c1), c2) < 0.0f);
323 }
324 
transform_clear_scale(const Transform & tfm)325 ccl_device_inline Transform transform_clear_scale(const Transform &tfm)
326 {
327   Transform ntfm = tfm;
328 
329   transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0)));
330   transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1)));
331   transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2)));
332 
333   return ntfm;
334 }
335 
transform_empty()336 ccl_device_inline Transform transform_empty()
337 {
338   return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339 }
340 
341 #endif
342 
343 /* Motion Transform */
344 
quat_interpolate(float4 q1,float4 q2,float t)345 ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
346 {
347   /* Optix is using lerp to interpolate motion transformations. */
348 #ifdef __KERNEL_OPTIX__
349   return normalize((1.0f - t) * q1 + t * q2);
350 #else  /* __KERNEL_OPTIX__ */
351   /* note: this does not ensure rotation around shortest angle, q1 and q2
352    * are assumed to be matched already in transform_motion_decompose */
353   float costheta = dot(q1, q2);
354 
355   /* possible optimization: it might be possible to precompute theta/qperp */
356 
357   if (costheta > 0.9995f) {
358     /* linear interpolation in degenerate case */
359     return normalize((1.0f - t) * q1 + t * q2);
360   }
361   else {
362     /* slerp */
363     float theta = acosf(clamp(costheta, -1.0f, 1.0f));
364     float4 qperp = normalize(q2 - q1 * costheta);
365     float thetap = theta * t;
366     return q1 * cosf(thetap) + qperp * sinf(thetap);
367   }
368 #endif /* __KERNEL_OPTIX__ */
369 }
370 
transform_quick_inverse(Transform M)371 ccl_device_inline Transform transform_quick_inverse(Transform M)
372 {
373   /* possible optimization: can we avoid doing this altogether and construct
374    * the inverse matrix directly from negated translation, transposed rotation,
375    * scale can be inverted but what about shearing? */
376   Transform R;
377   float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
378               M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
379   if (det == 0.0f) {
380     M.x.x += 1e-8f;
381     M.y.y += 1e-8f;
382     M.z.z += 1e-8f;
383     det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
384           M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
385   }
386   det = (det != 0.0f) ? 1.0f / det : 0.0f;
387 
388   float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z,
389                                 M.z.y * M.x.z - M.z.z * M.x.y,
390                                 M.y.z * M.x.y - M.y.y * M.x.z);
391   float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x,
392                                 M.z.z * M.x.x - M.z.x * M.x.z,
393                                 M.y.x * M.x.z - M.y.z * M.x.x);
394   float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y,
395                                 M.z.x * M.x.y - M.z.y * M.x.x,
396                                 M.y.y * M.x.x - M.y.x * M.x.y);
397   float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
398 
399   R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
400   R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
401   R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
402 
403   return R;
404 }
405 
transform_compose(Transform * tfm,const DecomposedTransform * decomp)406 ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp)
407 {
408   /* rotation */
409   float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
410 
411   q0 = M_SQRT2_F * decomp->x.w;
412   q1 = M_SQRT2_F * decomp->x.x;
413   q2 = M_SQRT2_F * decomp->x.y;
414   q3 = M_SQRT2_F * decomp->x.z;
415 
416   qda = q0 * q1;
417   qdb = q0 * q2;
418   qdc = q0 * q3;
419   qaa = q1 * q1;
420   qab = q1 * q2;
421   qac = q1 * q3;
422   qbb = q2 * q2;
423   qbc = q2 * q3;
424   qcc = q3 * q3;
425 
426   float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac);
427   float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc);
428   float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb);
429 
430   /* scale */
431   float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
432   float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
433   float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
434 
435   /* compose with translation */
436   tfm->x = make_float4(
437       dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
438   tfm->y = make_float4(
439       dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
440   tfm->z = make_float4(
441       dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
442 }
443 
444 /* Interpolate from array of decomposed transforms. */
transform_motion_array_interpolate(Transform * tfm,const ccl_global DecomposedTransform * motion,uint numsteps,float time)445 ccl_device void transform_motion_array_interpolate(Transform *tfm,
446                                                    const ccl_global DecomposedTransform *motion,
447                                                    uint numsteps,
448                                                    float time)
449 {
450   /* Figure out which steps we need to interpolate. */
451   int maxstep = numsteps - 1;
452   int step = min((int)(time * maxstep), maxstep - 1);
453   float t = time * maxstep - step;
454 
455   const ccl_global DecomposedTransform *a = motion + step;
456   const ccl_global DecomposedTransform *b = motion + step + 1;
457 
458   /* Interpolate rotation, translation and scale. */
459   DecomposedTransform decomp;
460   decomp.x = quat_interpolate(a->x, b->x, t);
461   decomp.y = (1.0f - t) * a->y + t * b->y;
462   decomp.z = (1.0f - t) * a->z + t * b->z;
463   decomp.w = (1.0f - t) * a->w + t * b->w;
464 
465   /* Compose rotation, translation, scale into matrix. */
466   transform_compose(tfm, &decomp);
467 }
468 
transform_isfinite_safe(Transform * tfm)469 ccl_device_inline bool transform_isfinite_safe(Transform *tfm)
470 {
471   return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z);
472 }
473 
transform_decomposed_isfinite_safe(DecomposedTransform * decomp)474 ccl_device_inline bool transform_decomposed_isfinite_safe(DecomposedTransform *decomp)
475 {
476   return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) &&
477          isfinite4_safe(decomp->w);
478 }
479 
480 #ifndef __KERNEL_GPU__
481 
482 class BoundBox2D;
483 
484 ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
485 {
486   return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
487 }
488 
489 float4 transform_to_quat(const Transform &tfm);
490 void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size);
491 Transform transform_from_viewplane(BoundBox2D &viewplane);
492 
493 #endif
494 
495 /* TODO(sergey): This is only for until we've got OpenCL 2.0
496  * on all devices we consider supported. It'll be replaced with
497  * generic address space.
498  */
499 
500 #ifdef __KERNEL_OPENCL__
501 
502 #  define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a##b
503 #  define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \
504     ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \
505         ccl_addr_space const Transform *t, const float3 a) \
506     { \
507       Transform private_tfm = *t; \
508       return function(&private_tfm, a); \
509     }
510 
511 OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point)
512 OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction)
513 OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
514 
515 #  undef OPENCL_TRANSFORM_ADDRSPACE_DECLARE
516 #  undef OPENCL_TRANSFORM_ADDRSPACE_GLUE
517 #  define transform_point_auto transform_point_addrspace
518 #  define transform_direction_auto transform_direction_addrspace
519 #  define transform_direction_transposed_auto transform_direction_transposed_addrspace
520 #else
521 #  define transform_point_auto transform_point
522 #  define transform_direction_auto transform_direction
523 #  define transform_direction_transposed_auto transform_direction_transposed
524 #endif
525 
526 CCL_NAMESPACE_END
527 
528 #endif /* __UTIL_TRANSFORM_H__ */
529